{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dictionary Trump Speeches\n",
    "\n",
    "* Author: \t\t\tValentina Gonzalez Rostani\n",
    "*  Contact mag384@pitt.edu\n",
    "* Date: August 19, 2024\n",
    "* Version: Python 3.8.17\n",
    "\n",
    "This jupyter notebook:\n",
    "\n",
    "- Creates dictionaries, and applies them to all Trump speeches in the sample.\n",
    "- These are inputs for the regression analysis conducted in 2_2_Speech_US.do\n",
    "\n",
    "\n",
    "\n",
    "Input:\n",
    "- Data/Text/Rallies_MSA.xlsx It is a xlsx file which contains data on the MSA and an identifier to merge with other files for the analysis. \n",
    "- Data/Text/Presidential... All txt files. From https://www.presidency.ucsb.edu/\n",
    "- Data/Text/Youtube... All txt files. Additional rallies obtained from Youtube.\n",
    "  \n",
    "\n",
    "Output:\n",
    "\n",
    "- Data/combined_df.csv   It is a csv file with the scores per each speech. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import glob\n",
    "import os # directory\n",
    "from nltk.corpus import stopwords # nltk = package for human language data processing. \n",
    "import re # regular expressions\n",
    "\n",
    "# results eliminating stop words \n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define Dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_culture=['immigr','border','wall','heritage','values','culture','inclusion','enforcement', 'muslim', 'christian','islamic','gay', 'lesbian', 'lgbt','terrorism','undocumented' ,'way of life']\n",
    "\n",
    "list_worker=['factory','factories','job','employ','unemploy','worker','labor','wage','paid','fair','unfair','manufactur','union','steel','hardwork','pay','hire','decent','trade','autoworker','deindustrialization','industr','globalization','offshor']\n",
    "#worker\n",
    "list_worker2=[['middle', 'class'],['hard','work'],['bring','back'],['America','first'],['america','first'],['forgotten', 'man'], ['blue', 'collar'],['American','hands'],['american','hands'],['hire','america'],['buy','america'],['america','made'],['lai','off'],['people','work','at']]\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Presidendial txt files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define directory for presidential txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Current Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\\Do\n",
      "Parent Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\n",
      "New Working Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\\Data\\Text\\Presidential\n"
     ]
    }
   ],
   "source": [
    "# Get the current working directory (where the Jupyter notebook is located)\n",
    "current_directory = os.getcwd()\n",
    "print(\"Current Directory:\", current_directory)\n",
    "\n",
    "# Move one level up\n",
    "parent_directory = os.path.dirname(current_directory)\n",
    "\n",
    "# Change the working directory to the parent directory\n",
    "os.chdir(parent_directory)\n",
    "print(\"Parent Directory:\", os.getcwd())\n",
    "\n",
    "# Now, navigate three levels down to Data\\Text\\Presidential\n",
    "new_directory = os.path.join(parent_directory, \"Data\", \"Text\", \"Presidential\")\n",
    "\n",
    "# Change the working directory to the new directory\n",
    "os.chdir(new_directory)\n",
    "print(\"New Working Directory:\", os.getcwd())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Functions to clean data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cleanPunctNotHyphen(doc):\n",
    "    return re.sub(r'[\",\",\".\",\":\",\";\",\"'\",'\"']','', doc)\n",
    "def tokenize(doc):\n",
    "    return doc.split()\n",
    "def lowerCase(list):\n",
    "    return [word.lower() for word in list]\n",
    "def cleanUpSteps(doc):\n",
    "    return lowerCase(tokenize(cleanPunctNotHyphen(doc)))\n",
    "\n",
    "\n",
    "  \n",
    "    # Lets stem\n",
    "myPatts = [r\"(?<=.{2})ies\\Z\", r\"es\\Z\", r\"(?<![ha|wa|$i|$a])(s{1})\\Z\", r\"(?<=.{2})(ing)\\Z\", r\"ly\\Z\", r\"er\\Z\",r\"(?<=.{2})(ed)\\Z\"]\n",
    "# note 2: r\"(?<![$ha|$wa|$i|$a])(s{1})\\Z\" uses negative look behind to avoid trimming (\"has\", \"was\", \"is\", \"as\") it would leave wawas\n",
    "# other patterns make sure that words like ring and red do not get stemmed (the \"\".{2}\"\" part)\n",
    "# (?<=) is a positive lookbehind. it says to look to right of the pattern in the group (), and \n",
    "# do not match that patter, just match the regex after it. Here, I am making sure there are at least two characters\n",
    "# before we lop off an ing or ed.\n",
    "\n",
    "def myStemmer(myList: list, myPatts: list) -> list:\n",
    "    ''' Stem tokens in myList based on myPatts\n",
    "\n",
    "    Args:\n",
    "        myList -- list of tokens to stem\n",
    "        myPatts -- list of regex patterns to use for stemming\n",
    "                   each pattern, if matched, lead to a deletion\n",
    "\n",
    "    Returns:\n",
    "        A list of stemmed tokens\n",
    "\n",
    "    Note:\n",
    "        The order of patterns matters, eg taking `ing` off changes the end of the\n",
    "        token and thus the regex match\n",
    "    '''\n",
    "    out = myList\n",
    "    for pat in myPatts:\n",
    "        out = [re.sub(pat, \"\", i) for i in out]\n",
    "    return out\n",
    "\n",
    "stop = stopwords.words(\"english\")\n",
    "\n",
    "def stop_word_removal(x):\n",
    "    '''\n",
    "    Arg:\n",
    "        the idea is to get a text and remove some words that we specified outside. The input then is the text but also the list of words that you plan to remove, in this case stop words.\n",
    "    Result:\n",
    "        list of words without the list of words you decided to remove. \n",
    "    '''\n",
    "    # Step 1 \n",
    "    stop_words = stop\n",
    "    \n",
    "    # Step 2\n",
    "    return [word for word in x if not word.lower() in stop_words]\n",
    "\n",
    "# Function to convert  \n",
    "def listToString(s): \n",
    "    \n",
    "    # initialize an empty string\n",
    "    str1 = \" \" \n",
    "    \n",
    "    # return string  \n",
    "    return (str1.join(s))\n",
    "\n",
    "\n",
    "def removeSquareBrackets(words_list):\n",
    "    # Remove words enclosed in square brackets\n",
    "    words_list = [re.sub(r'\\[.*?\\]', '', word) for word in words_list]\n",
    "    # Remove any empty strings\n",
    "    words_list = [word for word in words_list if word]\n",
    "    return words_list\n",
    "\n",
    "\n",
    "# Define cleaning function\n",
    "def clean_text(sentence):\n",
    "    # Step 1: Remove punctuations except hyphens\n",
    "    sentence = cleanPunctNotHyphen(sentence)\n",
    "    # Step 2: Tokenize\n",
    "    sentence = tokenize(sentence)\n",
    "    # Step 3: Lowercase\n",
    "    sentence = lowerCase(sentence)\n",
    "    # Step 4: Remove stop words\n",
    "    #sentence = stop_word_removal(sentence)\n",
    "    \n",
    "    sentence = removeSquareBrackets(sentence)\n",
    "    # Step 5: Stem words\n",
    "    #sentence = myStemmer(sentence, myPatts)\n",
    "    # Step 6: Convert list to string\n",
    "    sentence = listToString(sentence)\n",
    "    return sentence"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Opening all files in the directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a list to store the data\n",
    "data = []\n",
    "\n",
    "# Get a list of all .txt files in the current directory\n",
    "file_list = glob.glob(\"*.txt\")\n",
    "\n",
    "# Loop through each file and extract the data\n",
    "for file_name in file_list:\n",
    "    with open(file_name, \"r\") as file:\n",
    "        lines = file.readlines()\n",
    "        for line in lines[2:]:\n",
    "            # the 2 there is to eliminate the name and date. \n",
    "            # Remove any newline characters and append the data to the list\n",
    "            data.append((line.strip(), file_name))\n",
    "\n",
    "# Create a dataframe from the data\n",
    "df = pd.DataFrame(data, columns=[\"sentence\", \"file_name\"])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Cleaning files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Apply cleaning function to 'sentence' column\n",
    "df['clean_text'] = df['sentence'].apply(clean_text)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Applying the dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['culture']=0\n",
    "df['culture2']=0\n",
    "\n",
    "df['pro_work']=0\n",
    "df['pro_work2']=0\n",
    "\n",
    "\n",
    "\n",
    "for i in list_culture:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i)) & (~df[\"sentence\"].isna()), 'culture']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i)) & (~df[\"clean_text\"].isna()), 'culture2']=1\n",
    "    \n",
    "for i in list_worker:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i)) & (~df[\"sentence\"].isna()), 'pro_work']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i)) & (~df[\"clean_text\"].isna()), 'pro_work2']=1\n",
    "for i in list_worker2:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i[0])) & (df[\"sentence\"].str.contains(i[1])), 'pro_work']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i[0])) & (df[\"clean_text\"].str.contains(i[1])), 'pro_work2']=1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['culture_count'] = df['clean_text'].str.count('|'.join(list_culture))\n",
    "\n",
    "df['worker_count'] = df['clean_text'].str.count('|'.join(list_worker))\n",
    "\n",
    "df['worker_count2'] = 0  # Initialize the count column to 0\n",
    "\n",
    "for sublist in list_worker2:\n",
    "    for word in sublist:\n",
    "        # Use str.contains() to check if each word in the sublist appears in each sentence\n",
    "        # If it does, add 1 to the worker_count column for that row\n",
    "        df.loc[(df['clean_text'].str.contains(word, case=False)) & (~df['clean_text'].isna()), 'worker_count2'] += 1\n",
    "df['pro_worker_count'] = df['worker_count']+df['worker_count2']\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df['sentence_length'] = df['sentence'].apply(len)\n",
    "df['word_count'] = df['sentence'].str.split().apply(len)\n",
    "\n",
    "df = df[df['sentence_length'] != 0]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Summarizing data by speech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Total_sentences']=1\n",
    "sum=df[['culture_count', 'pro_worker_count','Total_sentences','word_count','file_name']].groupby(['file_name']).sum()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New Working Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\\Data\\Text\n"
     ]
    }
   ],
   "source": [
    "# Now, navigate three levels down to Data\\Text\\Presidential\n",
    "new_directory = os.path.join(parent_directory, \"Data\", \"Text\")\n",
    "\n",
    "# Change the working directory to the new directory\n",
    "os.chdir(new_directory)\n",
    "print(\"New Working Directory:\", os.getcwd())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "msa = pd.read_excel('Rallies_MSA.xlsx')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "merged_df = pd.merge(sum, msa, on='file_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>identifier</th>\n",
       "      <th>culture_count</th>\n",
       "      <th>pro_worker_count</th>\n",
       "      <th>Total_sentences</th>\n",
       "      <th>word_count</th>\n",
       "      <th>City</th>\n",
       "      <th>MSA</th>\n",
       "      <th>MSA_state</th>\n",
       "      <th>New</th>\n",
       "      <th>Date of Rally</th>\n",
       "      <th>...</th>\n",
       "      <th>Found (1 yes, 0 no)</th>\n",
       "      <th>Link</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>filename_notxt</th>\n",
       "      <th>file_name_youtube</th>\n",
       "      <th>file_name_youtubeee</th>\n",
       "      <th>Youtube</th>\n",
       "      <th>blank</th>\n",
       "      <th>Unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10_11_Panama City Beach_FL.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>110</td>\n",
       "      <td>37</td>\n",
       "      <td>1124</td>\n",
       "      <td>Panama City Beach</td>\n",
       "      <td>Panama City</td>\n",
       "      <td>Panama City_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-11</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/exce...</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>10_11_Panama City Beach_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10_13_Columbus_OH.txt</td>\n",
       "      <td>2</td>\n",
       "      <td>184</td>\n",
       "      <td>61</td>\n",
       "      <td>1615</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>Columbus_OH</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-10-13</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>10</td>\n",
       "      <td>13</td>\n",
       "      <td>10_13_Columbus_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10_18_ColoradoSprings_CO.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>323</td>\n",
       "      <td>106</td>\n",
       "      <td>4834</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2023-10-18</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Norris-Penrose Event Center in ...</td>\n",
       "      <td>10</td>\n",
       "      <td>18</td>\n",
       "      <td>10_18_Colorado Springs_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10_20_Delaware_OH.txt</td>\n",
       "      <td>17</td>\n",
       "      <td>252</td>\n",
       "      <td>89</td>\n",
       "      <td>2059</td>\n",
       "      <td>Delaware</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>Columbus_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-20</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Delaware County Fairgrounds in ...</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>10_20_Delaware_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10_21_Fletcher_NC.txt</td>\n",
       "      <td>16</td>\n",
       "      <td>242</td>\n",
       "      <td>90</td>\n",
       "      <td>1693</td>\n",
       "      <td>Fletcher</td>\n",
       "      <td>Asheville</td>\n",
       "      <td>Asheville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-21</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the WNC Agricultural Center's Davis...</td>\n",
       "      <td>10</td>\n",
       "      <td>21</td>\n",
       "      <td>10_21_Fletcher_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10_21_Newtown_PA.txt</td>\n",
       "      <td>15</td>\n",
       "      <td>195</td>\n",
       "      <td>78</td>\n",
       "      <td>1678</td>\n",
       "      <td>Newtown Township</td>\n",
       "      <td>Philadelphia-Camden-Wilmington</td>\n",
       "      <td>Philadelphia-Camden-Wilmington_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Newtown Athletic Club Sports Tr...</td>\n",
       "      <td>10</td>\n",
       "      <td>21</td>\n",
       "      <td>10_21_Newtown Township_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>10_22_Gettysburgh_PA.txt</td>\n",
       "      <td>22</td>\n",
       "      <td>391</td>\n",
       "      <td>105</td>\n",
       "      <td>4525</td>\n",
       "      <td>Gettysburg</td>\n",
       "      <td>Gettysburg</td>\n",
       "      <td>Gettysburg_PA</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-10-22</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>10</td>\n",
       "      <td>22</td>\n",
       "      <td>10_22_Gettysburgh_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10_23_Naples_FL.txt</td>\n",
       "      <td>16</td>\n",
       "      <td>291</td>\n",
       "      <td>78</td>\n",
       "      <td>1820</td>\n",
       "      <td>Naples</td>\n",
       "      <td>Naples-Marco Island</td>\n",
       "      <td>Naples-Marco Island_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-23</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Collier County Fairgrounds in N...</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>10_23_Naples_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10_26_Charlotte_NC.txt</td>\n",
       "      <td>11</td>\n",
       "      <td>321</td>\n",
       "      <td>58</td>\n",
       "      <td>2396</td>\n",
       "      <td>Charlotte</td>\n",
       "      <td>Charlotte-Concord-Gastonia</td>\n",
       "      <td>Charlotte-Concord-Gastonia_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10</td>\n",
       "      <td>26</td>\n",
       "      <td>10_26_Charlotte_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>tNcgUGm6BEw.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10_27_Springfield_OH.txt</td>\n",
       "      <td>19</td>\n",
       "      <td>286</td>\n",
       "      <td>98</td>\n",
       "      <td>3945</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Springfield_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-27</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Champions Center Expo in Spring...</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>10_27_Springfield_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10_28_Manchester_NH.txt</td>\n",
       "      <td>17</td>\n",
       "      <td>370</td>\n",
       "      <td>117</td>\n",
       "      <td>5310</td>\n",
       "      <td>Manchester</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-28</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at Radisson Armory in Manchester, New ...</td>\n",
       "      <td>10</td>\n",
       "      <td>28</td>\n",
       "      <td>10_28_Manchester_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>10_29_Golden_CO.txt</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>42</td>\n",
       "      <td>809</td>\n",
       "      <td>Golden</td>\n",
       "      <td>Denver-Aurora-Lakewood</td>\n",
       "      <td>Denver-Aurora-Lakewood_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-29</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Jeffco Fairgrounds Event Center...</td>\n",
       "      <td>10</td>\n",
       "      <td>29</td>\n",
       "      <td>10_29_Golden_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>10_29_Phoenix_AZ.txt</td>\n",
       "      <td>15</td>\n",
       "      <td>286</td>\n",
       "      <td>92</td>\n",
       "      <td>2096</td>\n",
       "      <td>Phoenix</td>\n",
       "      <td>Phoenix-Mesa-Chandler</td>\n",
       "      <td>Phoenix-Mesa-Chandler_AZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Phoenix Convention Center in Ph...</td>\n",
       "      <td>10</td>\n",
       "      <td>29</td>\n",
       "      <td>10_29_Phoenix_AZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>10_31_Warren_MI.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>372</td>\n",
       "      <td>85</td>\n",
       "      <td>2434</td>\n",
       "      <td>Warren</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit–Warren–Dearborn_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at Macomb Community College South Camp...</td>\n",
       "      <td>10</td>\n",
       "      <td>31</td>\n",
       "      <td>10_31_Warren_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>11_02_Miami_FL.txt</td>\n",
       "      <td>24</td>\n",
       "      <td>356</td>\n",
       "      <td>143</td>\n",
       "      <td>4767</td>\n",
       "      <td>Miami</td>\n",
       "      <td>Miami-Fort Lauderdale-Pompano Beach</td>\n",
       "      <td>Miami-Fort Lauderdale-Pompano Beach_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Bayfront Park Amphitheater in M...</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>11_2_Miami_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>11_02_Orlando_FL.txt</td>\n",
       "      <td>14</td>\n",
       "      <td>313</td>\n",
       "      <td>134</td>\n",
       "      <td>3741</td>\n",
       "      <td>Orlando</td>\n",
       "      <td>Orlando-Kissimmee-Sanford</td>\n",
       "      <td>Orlando-Kissimmee-Sanford_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-02</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Remarks at the Central Florida Fairgrounds in ...</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>11_2_Orlando_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>11_07_Raleigh_NC.txt</td>\n",
       "      <td>13</td>\n",
       "      <td>369</td>\n",
       "      <td>218</td>\n",
       "      <td>6187</td>\n",
       "      <td>Raleigh</td>\n",
       "      <td>Raleigh-Cary</td>\n",
       "      <td>Raleigh-Cary_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks at J.S Dorton Arena in Raleigh, North ...</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>11_7_Raleigh_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>11_09_NewYorkCity_NY.txt</td>\n",
       "      <td>0</td>\n",
       "      <td>143</td>\n",
       "      <td>60</td>\n",
       "      <td>1586</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York-Newark-Jersey City</td>\n",
       "      <td>New York-Newark-Jersey City_NY</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-11-09</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Accepting Election as the 45th President of th...</td>\n",
       "      <td>11</td>\n",
       "      <td>9</td>\n",
       "      <td>11_9_New York City_NY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>11_1_ValleyForge_PA.txt</td>\n",
       "      <td>2</td>\n",
       "      <td>210</td>\n",
       "      <td>68</td>\n",
       "      <td>2342</td>\n",
       "      <td>Valley Forge</td>\n",
       "      <td>Philadelphia-Camden-Wilmington</td>\n",
       "      <td>Philadelphia-Camden-Wilmington_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-01</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks on Obamacare in Valley Forge, Pennsylv...</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>11_1_Valley Forge_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>6_13_Manchester_NH.txt</td>\n",
       "      <td>83</td>\n",
       "      <td>347</td>\n",
       "      <td>133</td>\n",
       "      <td>3042</td>\n",
       "      <td>Manchester</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-06-13</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>6</td>\n",
       "      <td>13</td>\n",
       "      <td>6_13_Manchester_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>6_22_NewYorkCity_NY.txt</td>\n",
       "      <td>28</td>\n",
       "      <td>478</td>\n",
       "      <td>178</td>\n",
       "      <td>3384</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York-Newark-Jersey City</td>\n",
       "      <td>New York-Newark-Jersey City_NY</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-06-22</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>6</td>\n",
       "      <td>22</td>\n",
       "      <td>6_22_NewYorkCity_NY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>7_11_VirginigaBeach_VA.txt</td>\n",
       "      <td>6</td>\n",
       "      <td>298</td>\n",
       "      <td>128</td>\n",
       "      <td>2535</td>\n",
       "      <td>Virginia Beach</td>\n",
       "      <td>Virginia Beach-Norfolk-Newport News</td>\n",
       "      <td>Virginia Beach-Norfolk-Newport News_VA</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-07-11</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>7_11_VirginigaBeach_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>7_16_NewYorkCity_NY.txt</td>\n",
       "      <td>14</td>\n",
       "      <td>209</td>\n",
       "      <td>72</td>\n",
       "      <td>4159</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York-Newark-Jersey City</td>\n",
       "      <td>New York-Newark-Jersey City_NY</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-07-16</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>7</td>\n",
       "      <td>16</td>\n",
       "      <td>7_16_NewYorkCity_NY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>7_21_Cleveland_OH.txt</td>\n",
       "      <td>45</td>\n",
       "      <td>569</td>\n",
       "      <td>127</td>\n",
       "      <td>5133</td>\n",
       "      <td>Cleveland</td>\n",
       "      <td>Cleveland-Elyria</td>\n",
       "      <td>Cleveland-Elyria_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-07-21</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7</td>\n",
       "      <td>21</td>\n",
       "      <td>7_21_Cleveland_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>7_29_Denver_CO.txt</td>\n",
       "      <td>28</td>\n",
       "      <td>94</td>\n",
       "      <td>6</td>\n",
       "      <td>5446</td>\n",
       "      <td>Denver</td>\n",
       "      <td>Denver-Aurora-Lakewood</td>\n",
       "      <td>Denver-Aurora-Lakewood_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.c-span.org/video/?413365-1/donald-...</td>\n",
       "      <td>7</td>\n",
       "      <td>29</td>\n",
       "      <td>7_29_Denver_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>8_09_Wilmington_NC.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>431</td>\n",
       "      <td>197</td>\n",
       "      <td>7382</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Wilmington_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>8_9_Wilmington_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>8_12_Erie_PA.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>352</td>\n",
       "      <td>149</td>\n",
       "      <td>7793</td>\n",
       "      <td>Erie</td>\n",
       "      <td>Erie</td>\n",
       "      <td>Erie_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>12</td>\n",
       "      <td>8_12_Erie_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>8_15_Youngstown_OH.txt</td>\n",
       "      <td>65</td>\n",
       "      <td>273</td>\n",
       "      <td>79</td>\n",
       "      <td>4864</td>\n",
       "      <td>Youngstown</td>\n",
       "      <td>Youngstown-Warren-Boardman</td>\n",
       "      <td>Youngstown-Warren-Boardman_OH</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-08-15</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>8_15_Youngstown_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>8_16_WestBend_WI.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>360</td>\n",
       "      <td>99</td>\n",
       "      <td>2906</td>\n",
       "      <td>West Bend</td>\n",
       "      <td>Milwaukee-Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-16</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>8_16_West Bend_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>8_18_Charlotte_NC.txt</td>\n",
       "      <td>34</td>\n",
       "      <td>427</td>\n",
       "      <td>118</td>\n",
       "      <td>3192</td>\n",
       "      <td>Charlotte</td>\n",
       "      <td>Charlotte-Concord-Gastonia</td>\n",
       "      <td>Charlotte-Concord-Gastonia_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-18</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>18</td>\n",
       "      <td>8_18_Charlotte_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>8_19_Dimondale_MI.txt</td>\n",
       "      <td>14</td>\n",
       "      <td>441</td>\n",
       "      <td>129</td>\n",
       "      <td>4265</td>\n",
       "      <td>Dimondale</td>\n",
       "      <td>Lansing-East Lansing</td>\n",
       "      <td>Lansing-East Lansing_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-19</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>19</td>\n",
       "      <td>8_19_Dimondale_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>8_23_Austin_TX.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>343</td>\n",
       "      <td>120</td>\n",
       "      <td>2315</td>\n",
       "      <td>Austin</td>\n",
       "      <td>Austin-Round Rock-Georgetown</td>\n",
       "      <td>Austin-Round Rock-Georgetown_TX</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-23</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>23</td>\n",
       "      <td>8_23_Austin_TX</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>8_30_Everett_WA.txt</td>\n",
       "      <td>11</td>\n",
       "      <td>281</td>\n",
       "      <td>101</td>\n",
       "      <td>1957</td>\n",
       "      <td>Everett</td>\n",
       "      <td>Seattle-Tacoma-Bellevue</td>\n",
       "      <td>Seattle-Tacoma-Bellevue_WA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-30</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>30</td>\n",
       "      <td>8_30_Everett_WA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>8_31_Phoenix_AZ.txt</td>\n",
       "      <td>116</td>\n",
       "      <td>542</td>\n",
       "      <td>198</td>\n",
       "      <td>6851</td>\n",
       "      <td>Phoenix</td>\n",
       "      <td>Phoenix-Mesa-Chandler</td>\n",
       "      <td>Phoenix-Mesa-Chandler_AZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-31</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>31</td>\n",
       "      <td>8_31_Phoenix_AZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>8_5_Green Bay_WI.txt</td>\n",
       "      <td>39</td>\n",
       "      <td>457</td>\n",
       "      <td>213</td>\n",
       "      <td>9473</td>\n",
       "      <td>Green Bay</td>\n",
       "      <td>Green Bay</td>\n",
       "      <td>Green Bay_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>8_5_Green Bay_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>8_8_Detroit_MI.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>625</td>\n",
       "      <td>139</td>\n",
       "      <td>3613</td>\n",
       "      <td>Detroit</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit–Warren–Dearborn_MI</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-08-08</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>8_8_Detroit_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>9_01_Cincinnati_OH.txt</td>\n",
       "      <td>9</td>\n",
       "      <td>208</td>\n",
       "      <td>48</td>\n",
       "      <td>1262</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati_OH</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-01</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks to the American Legion</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>9_1_Cincinnati_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>9_01_Wilmington_OH.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>248</td>\n",
       "      <td>75</td>\n",
       "      <td>1477</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-01</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>9_1_Wilmington_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>9_03_Detroit_MI.txt</td>\n",
       "      <td>4</td>\n",
       "      <td>119</td>\n",
       "      <td>28</td>\n",
       "      <td>1640</td>\n",
       "      <td>Detroit</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit–Warren–Dearborn_MI</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-03</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks at Great Faith International Ministrie...</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>9_3_Detroit_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>9_06_Greenville_NC.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>292</td>\n",
       "      <td>84</td>\n",
       "      <td>2109</td>\n",
       "      <td>Greenville</td>\n",
       "      <td>Greenville</td>\n",
       "      <td>Greenville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-06</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>9_6_Greenville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>9_07_Philadelphia_PA.txt</td>\n",
       "      <td>14</td>\n",
       "      <td>212</td>\n",
       "      <td>93</td>\n",
       "      <td>2381</td>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Philadelphia-Camden-Wilmington</td>\n",
       "      <td>Philadelphia-Camden-Wilmington_PA</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-07</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks at the Union League of Philadelphia in...</td>\n",
       "      <td>9</td>\n",
       "      <td>7</td>\n",
       "      <td>9_7_Philadelphia_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>9_08_Cleveland_OH.txt</td>\n",
       "      <td>4</td>\n",
       "      <td>298</td>\n",
       "      <td>104</td>\n",
       "      <td>2716</td>\n",
       "      <td>Cleveland</td>\n",
       "      <td>Cleveland-Elyria</td>\n",
       "      <td>Cleveland-Elyria_OH</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-08</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks at the Cleveland Arts and Social Scien...</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>9_8_Cleveland_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>9_09_Pensacola_FL.txt</td>\n",
       "      <td>9</td>\n",
       "      <td>306</td>\n",
       "      <td>111</td>\n",
       "      <td>2629</td>\n",
       "      <td>Pensacola</td>\n",
       "      <td>Pensacola-Ferry Pass-Brent</td>\n",
       "      <td>Pensacola-Ferry Pass-Brent_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-09</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>9_9_Pensacola_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>9_12_Asheville_NC.txt</td>\n",
       "      <td>6</td>\n",
       "      <td>325</td>\n",
       "      <td>91</td>\n",
       "      <td>1834</td>\n",
       "      <td>Asheville</td>\n",
       "      <td>Asheville</td>\n",
       "      <td>Asheville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-12</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>9_12_Asheville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>9_13_Aston_PA.txt</td>\n",
       "      <td>1</td>\n",
       "      <td>175</td>\n",
       "      <td>47</td>\n",
       "      <td>1454</td>\n",
       "      <td>Aston</td>\n",
       "      <td>Philadelphia-Camden-Wilmington</td>\n",
       "      <td>Philadelphia-Camden-Wilmington_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>9_13_Aston_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>9_13_Clive_IA.txt</td>\n",
       "      <td>8</td>\n",
       "      <td>323</td>\n",
       "      <td>69</td>\n",
       "      <td>1829</td>\n",
       "      <td>Clive</td>\n",
       "      <td>Des Moines-West Des Moines</td>\n",
       "      <td>Des Moines-West Des Moines_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2023-09-13</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>9_13_Clive_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>9_14_Canton_OH.txt</td>\n",
       "      <td>4</td>\n",
       "      <td>309</td>\n",
       "      <td>66</td>\n",
       "      <td>1737</td>\n",
       "      <td>Canton</td>\n",
       "      <td>Canton-Massillon</td>\n",
       "      <td>Canton-Massillon_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-14</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>14</td>\n",
       "      <td>9_14_Canton_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>9_15_NewYorkCity_NY.txt</td>\n",
       "      <td>4</td>\n",
       "      <td>478</td>\n",
       "      <td>114</td>\n",
       "      <td>3063</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York-Newark-Jersey City</td>\n",
       "      <td>New York-Newark-Jersey City_NY</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-15</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>9</td>\n",
       "      <td>15</td>\n",
       "      <td>9_15_NewYorkCity_NY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>9_16_Miami_FL.txt</td>\n",
       "      <td>11</td>\n",
       "      <td>314</td>\n",
       "      <td>62</td>\n",
       "      <td>1581</td>\n",
       "      <td>Miami</td>\n",
       "      <td>Miami-Fort Lauderdale-Pompano Beach</td>\n",
       "      <td>Miami-Fort Lauderdale-Pompano Beach_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-16</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>16</td>\n",
       "      <td>9_16_Miami_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>9_17_Houston_TX.txt</td>\n",
       "      <td>23</td>\n",
       "      <td>145</td>\n",
       "      <td>38</td>\n",
       "      <td>1328</td>\n",
       "      <td>Houston</td>\n",
       "      <td>Houston-The Woodlands-Sugar Land</td>\n",
       "      <td>Houston-The Woodlands-Sugar Land_TX</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-17</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>9</td>\n",
       "      <td>17</td>\n",
       "      <td>9_17_Houston_TX</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>9_20_High Point_NC.txt</td>\n",
       "      <td>32</td>\n",
       "      <td>249</td>\n",
       "      <td>73</td>\n",
       "      <td>1809</td>\n",
       "      <td>High Point</td>\n",
       "      <td>Greensboro-High Point</td>\n",
       "      <td>Greensboro-High Point_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-20</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>20</td>\n",
       "      <td>9_20_High Point_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>9_22_Chester Township_PA.txt</td>\n",
       "      <td>9</td>\n",
       "      <td>395</td>\n",
       "      <td>101</td>\n",
       "      <td>2601</td>\n",
       "      <td>Chester Township</td>\n",
       "      <td>Philadelphia-Camden-Wilmington</td>\n",
       "      <td>Philadelphia-Camden-Wilmington_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-22</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>22</td>\n",
       "      <td>9_22_Chester Township_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>9_22_Pittsburgh_PA.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>337</td>\n",
       "      <td>86</td>\n",
       "      <td>2130</td>\n",
       "      <td>Pittsburgh</td>\n",
       "      <td>Pittsburgh</td>\n",
       "      <td>Pittsburgh_PA</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-09-22</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Remarks at the Shale Insight TM Conferencee at...</td>\n",
       "      <td>9</td>\n",
       "      <td>22</td>\n",
       "      <td>9_22_Pittsburgh_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>9_24_Roanoke_VA.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>360</td>\n",
       "      <td>88</td>\n",
       "      <td>2196</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-24</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>24</td>\n",
       "      <td>9_24_Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>9_28_Council Bluffs_IA.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>339</td>\n",
       "      <td>91</td>\n",
       "      <td>2024</td>\n",
       "      <td>Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-28</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>9_28_Waukesha_WI.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>345</td>\n",
       "      <td>98</td>\n",
       "      <td>2126</td>\n",
       "      <td>Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>9_29_Bedford_NH.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>327</td>\n",
       "      <td>91</td>\n",
       "      <td>1873</td>\n",
       "      <td>Bedford</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-29</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>29</td>\n",
       "      <td>9_29_Bedford_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>9_30_Novi_MI.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>394</td>\n",
       "      <td>115</td>\n",
       "      <td>2512</td>\n",
       "      <td>Novi</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit–Warren–Dearborn_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-30</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>9</td>\n",
       "      <td>30</td>\n",
       "      <td>9_30_Novi_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>58 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        identifier  culture_count  pro_worker_count  \\\n",
       "0   10_11_Panama City Beach_FL.txt              7               110   \n",
       "1            10_13_Columbus_OH.txt              2               184   \n",
       "2     10_18_ColoradoSprings_CO.txt             25               323   \n",
       "3            10_20_Delaware_OH.txt             17               252   \n",
       "4            10_21_Fletcher_NC.txt             16               242   \n",
       "5             10_21_Newtown_PA.txt             15               195   \n",
       "6         10_22_Gettysburgh_PA.txt             22               391   \n",
       "7              10_23_Naples_FL.txt             16               291   \n",
       "8           10_26_Charlotte_NC.txt             11               321   \n",
       "9         10_27_Springfield_OH.txt             19               286   \n",
       "10         10_28_Manchester_NH.txt             17               370   \n",
       "11             10_29_Golden_CO.txt              1                45   \n",
       "12            10_29_Phoenix_AZ.txt             15               286   \n",
       "13             10_31_Warren_MI.txt             12               372   \n",
       "14              11_02_Miami_FL.txt             24               356   \n",
       "15            11_02_Orlando_FL.txt             14               313   \n",
       "16            11_07_Raleigh_NC.txt             13               369   \n",
       "17        11_09_NewYorkCity_NY.txt              0               143   \n",
       "18         11_1_ValleyForge_PA.txt              2               210   \n",
       "19          6_13_Manchester_NH.txt             83               347   \n",
       "20         6_22_NewYorkCity_NY.txt             28               478   \n",
       "21      7_11_VirginigaBeach_VA.txt              6               298   \n",
       "22         7_16_NewYorkCity_NY.txt             14               209   \n",
       "23           7_21_Cleveland_OH.txt             45               569   \n",
       "24              7_29_Denver_CO.txt             28                94   \n",
       "25          8_09_Wilmington_NC.txt             10               431   \n",
       "26                8_12_Erie_PA.txt             18               352   \n",
       "27          8_15_Youngstown_OH.txt             65               273   \n",
       "28            8_16_WestBend_WI.txt             18               360   \n",
       "29           8_18_Charlotte_NC.txt             34               427   \n",
       "30           8_19_Dimondale_MI.txt             14               441   \n",
       "31              8_23_Austin_TX.txt             18               343   \n",
       "32             8_30_Everett_WA.txt             11               281   \n",
       "33             8_31_Phoenix_AZ.txt            116               542   \n",
       "34            8_5_Green Bay_WI.txt             39               457   \n",
       "35              8_8_Detroit_MI.txt              7               625   \n",
       "36          9_01_Cincinnati_OH.txt              9               208   \n",
       "37          9_01_Wilmington_OH.txt             10               248   \n",
       "38             9_03_Detroit_MI.txt              4               119   \n",
       "39          9_06_Greenville_NC.txt              7               292   \n",
       "40        9_07_Philadelphia_PA.txt             14               212   \n",
       "41           9_08_Cleveland_OH.txt              4               298   \n",
       "42           9_09_Pensacola_FL.txt              9               306   \n",
       "43           9_12_Asheville_NC.txt              6               325   \n",
       "44               9_13_Aston_PA.txt              1               175   \n",
       "45               9_13_Clive_IA.txt              8               323   \n",
       "46              9_14_Canton_OH.txt              4               309   \n",
       "47         9_15_NewYorkCity_NY.txt              4               478   \n",
       "48               9_16_Miami_FL.txt             11               314   \n",
       "49             9_17_Houston_TX.txt             23               145   \n",
       "50          9_20_High Point_NC.txt             32               249   \n",
       "51    9_22_Chester Township_PA.txt              9               395   \n",
       "52          9_22_Pittsburgh_PA.txt              7               337   \n",
       "53             9_24_Roanoke_VA.txt             10               360   \n",
       "54      9_28_Council Bluffs_IA.txt             12               339   \n",
       "55            9_28_Waukesha_WI.txt              7               345   \n",
       "56             9_29_Bedford_NH.txt             10               327   \n",
       "57                9_30_Novi_MI.txt             10               394   \n",
       "\n",
       "    Total_sentences  word_count               City  \\\n",
       "0                37        1124  Panama City Beach   \n",
       "1                61        1615           Columbus   \n",
       "2               106        4834   Colorado Springs   \n",
       "3                89        2059           Delaware   \n",
       "4                90        1693           Fletcher   \n",
       "5                78        1678   Newtown Township   \n",
       "6               105        4525         Gettysburg   \n",
       "7                78        1820             Naples   \n",
       "8                58        2396          Charlotte   \n",
       "9                98        3945        Springfield   \n",
       "10              117        5310         Manchester   \n",
       "11               42         809             Golden   \n",
       "12               92        2096            Phoenix   \n",
       "13               85        2434             Warren   \n",
       "14              143        4767              Miami   \n",
       "15              134        3741            Orlando   \n",
       "16              218        6187            Raleigh   \n",
       "17               60        1586      New York City   \n",
       "18               68        2342       Valley Forge   \n",
       "19              133        3042         Manchester   \n",
       "20              178        3384      New York City   \n",
       "21              128        2535     Virginia Beach   \n",
       "22               72        4159      New York City   \n",
       "23              127        5133          Cleveland   \n",
       "24                6        5446             Denver   \n",
       "25              197        7382         Wilmington   \n",
       "26              149        7793               Erie   \n",
       "27               79        4864         Youngstown   \n",
       "28               99        2906          West Bend   \n",
       "29              118        3192          Charlotte   \n",
       "30              129        4265          Dimondale   \n",
       "31              120        2315             Austin   \n",
       "32              101        1957            Everett   \n",
       "33              198        6851            Phoenix   \n",
       "34              213        9473          Green Bay   \n",
       "35              139        3613            Detroit   \n",
       "36               48        1262         Cincinnati   \n",
       "37               75        1477         Wilmington   \n",
       "38               28        1640            Detroit   \n",
       "39               84        2109         Greenville   \n",
       "40               93        2381       Philadelphia   \n",
       "41              104        2716          Cleveland   \n",
       "42              111        2629          Pensacola   \n",
       "43               91        1834          Asheville   \n",
       "44               47        1454              Aston   \n",
       "45               69        1829              Clive   \n",
       "46               66        1737             Canton   \n",
       "47              114        3063      New York City   \n",
       "48               62        1581              Miami   \n",
       "49               38        1328            Houston   \n",
       "50               73        1809         High Point   \n",
       "51              101        2601   Chester Township   \n",
       "52               86        2130         Pittsburgh   \n",
       "53               88        2196            Roanoke   \n",
       "54               91        2024     Council Bluffs   \n",
       "55               98        2126           Waukesha   \n",
       "56               91        1873            Bedford   \n",
       "57              115        2512               Novi   \n",
       "\n",
       "                                    MSA  \\\n",
       "0                           Panama City   \n",
       "1                              Columbus   \n",
       "2                      Colorado Springs   \n",
       "3                              Columbus   \n",
       "4                             Asheville   \n",
       "5        Philadelphia-Camden-Wilmington   \n",
       "6                            Gettysburg   \n",
       "7                   Naples-Marco Island   \n",
       "8            Charlotte-Concord-Gastonia   \n",
       "9                           Springfield   \n",
       "10                    Manchester-Nashua   \n",
       "11               Denver-Aurora-Lakewood   \n",
       "12                Phoenix-Mesa-Chandler   \n",
       "13              Detroit–Warren–Dearborn   \n",
       "14  Miami-Fort Lauderdale-Pompano Beach   \n",
       "15            Orlando-Kissimmee-Sanford   \n",
       "16                         Raleigh-Cary   \n",
       "17          New York-Newark-Jersey City   \n",
       "18       Philadelphia-Camden-Wilmington   \n",
       "19                    Manchester-Nashua   \n",
       "20          New York-Newark-Jersey City   \n",
       "21  Virginia Beach-Norfolk-Newport News   \n",
       "22          New York-Newark-Jersey City   \n",
       "23                     Cleveland-Elyria   \n",
       "24               Denver-Aurora-Lakewood   \n",
       "25                           Wilmington   \n",
       "26                                 Erie   \n",
       "27           Youngstown-Warren-Boardman   \n",
       "28                   Milwaukee-Waukesha   \n",
       "29           Charlotte-Concord-Gastonia   \n",
       "30                 Lansing-East Lansing   \n",
       "31         Austin-Round Rock-Georgetown   \n",
       "32              Seattle-Tacoma-Bellevue   \n",
       "33                Phoenix-Mesa-Chandler   \n",
       "34                            Green Bay   \n",
       "35              Detroit–Warren–Dearborn   \n",
       "36                           Cincinnati   \n",
       "37                           Cincinnati   \n",
       "38              Detroit–Warren–Dearborn   \n",
       "39                           Greenville   \n",
       "40       Philadelphia-Camden-Wilmington   \n",
       "41                     Cleveland-Elyria   \n",
       "42           Pensacola-Ferry Pass-Brent   \n",
       "43                            Asheville   \n",
       "44       Philadelphia-Camden-Wilmington   \n",
       "45           Des Moines-West Des Moines   \n",
       "46                     Canton-Massillon   \n",
       "47          New York-Newark-Jersey City   \n",
       "48  Miami-Fort Lauderdale-Pompano Beach   \n",
       "49     Houston-The Woodlands-Sugar Land   \n",
       "50                Greensboro-High Point   \n",
       "51       Philadelphia-Camden-Wilmington   \n",
       "52                           Pittsburgh   \n",
       "53                              Roanoke   \n",
       "54                 Omaha-Council Bluffs   \n",
       "55                   Milwaukee-Waukesha   \n",
       "56                    Manchester-Nashua   \n",
       "57              Detroit–Warren–Dearborn   \n",
       "\n",
       "                                 MSA_state  New Date of Rally  ...  \\\n",
       "0                           Panama City_FL  NaN    2016-10-11  ...   \n",
       "1                              Columbus_OH  1.0    2016-10-13  ...   \n",
       "2                      Colorado Springs_CO  NaN    2023-10-18  ...   \n",
       "3                              Columbus_OH  NaN    2016-10-20  ...   \n",
       "4                             Asheville_NC  NaN    2016-10-21  ...   \n",
       "5        Philadelphia-Camden-Wilmington_PA  NaN           NaT  ...   \n",
       "6                            Gettysburg_PA  1.0    2016-10-22  ...   \n",
       "7                   Naples-Marco Island_FL  NaN    2016-10-23  ...   \n",
       "8            Charlotte-Concord-Gastonia_NC  NaN           NaT  ...   \n",
       "9                           Springfield_OH  NaN    2016-10-27  ...   \n",
       "10                    Manchester-Nashua_NH  NaN    2016-10-28  ...   \n",
       "11               Denver-Aurora-Lakewood_CO  NaN    2016-10-29  ...   \n",
       "12                Phoenix-Mesa-Chandler_AZ  NaN           NaT  ...   \n",
       "13              Detroit–Warren–Dearborn_MI  NaN           NaT  ...   \n",
       "14  Miami-Fort Lauderdale-Pompano Beach_FL  NaN           NaT  ...   \n",
       "15            Orlando-Kissimmee-Sanford_FL  NaN    2016-11-02  ...   \n",
       "16                         Raleigh-Cary_NC  NaN           NaT  ...   \n",
       "17          New York-Newark-Jersey City_NY  1.0    2016-11-09  ...   \n",
       "18       Philadelphia-Camden-Wilmington_PA  NaN    2016-11-01  ...   \n",
       "19                    Manchester-Nashua_NH  1.0    2016-06-13  ...   \n",
       "20          New York-Newark-Jersey City_NY  1.0    2016-06-22  ...   \n",
       "21  Virginia Beach-Norfolk-Newport News_VA  1.0    2016-07-11  ...   \n",
       "22          New York-Newark-Jersey City_NY  1.0    2016-07-16  ...   \n",
       "23                     Cleveland-Elyria_OH  NaN    2016-07-21  ...   \n",
       "24               Denver-Aurora-Lakewood_CO  NaN           NaT  ...   \n",
       "25                           Wilmington_NC  NaN           NaT  ...   \n",
       "26                                 Erie_OH  NaN           NaT  ...   \n",
       "27           Youngstown-Warren-Boardman_OH  1.0    2016-08-15  ...   \n",
       "28                   Milwaukee-Waukesha_WI  NaN    2016-08-16  ...   \n",
       "29           Charlotte-Concord-Gastonia_NC  NaN    2016-08-18  ...   \n",
       "30                 Lansing-East Lansing_MI  NaN    2016-08-19  ...   \n",
       "31         Austin-Round Rock-Georgetown_TX  NaN    2016-08-23  ...   \n",
       "32              Seattle-Tacoma-Bellevue_WA  NaN    2016-08-30  ...   \n",
       "33                Phoenix-Mesa-Chandler_AZ  NaN    2016-08-31  ...   \n",
       "34                            Green Bay_WI  NaN           NaT  ...   \n",
       "35              Detroit–Warren–Dearborn_MI  1.0    2016-08-08  ...   \n",
       "36                           Cincinnati_OH  1.0    2016-09-01  ...   \n",
       "37                           Cincinnati_OH  NaN    2016-09-01  ...   \n",
       "38              Detroit–Warren–Dearborn_MI  1.0    2016-09-03  ...   \n",
       "39                           Greenville_NC  NaN    2016-09-06  ...   \n",
       "40       Philadelphia-Camden-Wilmington_PA  1.0    2016-09-07  ...   \n",
       "41                     Cleveland-Elyria_OH  1.0    2016-09-08  ...   \n",
       "42           Pensacola-Ferry Pass-Brent_FL  NaN    2016-09-09  ...   \n",
       "43                            Asheville_NC  NaN    2016-09-12  ...   \n",
       "44       Philadelphia-Camden-Wilmington_PA  NaN           NaT  ...   \n",
       "45           Des Moines-West Des Moines_IA  NaN    2023-09-13  ...   \n",
       "46                     Canton-Massillon_OH  NaN    2016-09-14  ...   \n",
       "47          New York-Newark-Jersey City_NY  1.0    2016-09-15  ...   \n",
       "48  Miami-Fort Lauderdale-Pompano Beach_FL  NaN    2016-09-16  ...   \n",
       "49     Houston-The Woodlands-Sugar Land_TX  1.0    2016-09-17  ...   \n",
       "50                Greensboro-High Point_NC  NaN    2016-09-20  ...   \n",
       "51       Philadelphia-Camden-Wilmington_PA  NaN    2016-09-22  ...   \n",
       "52                           Pittsburgh_PA  1.0    2016-09-22  ...   \n",
       "53                              Roanoke_VA  NaN    2016-09-24  ...   \n",
       "54                 Omaha-Council Bluffs_IA  NaN    2016-09-28  ...   \n",
       "55                   Milwaukee-Waukesha_WI  NaN           NaT  ...   \n",
       "56                    Manchester-Nashua_NH  NaN    2016-09-29  ...   \n",
       "57              Detroit–Warren–Dearborn_MI  NaN    2016-09-30  ...   \n",
       "\n",
       "   Found (1 yes, 0 no)                                               Link  \\\n",
       "0                  1.0  https://www.presidency.ucsb.edu/documents/exce...   \n",
       "1                  NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "2                  NaN  Remarks at the Norris-Penrose Event Center in ...   \n",
       "3                  NaN  Remarks at the Delaware County Fairgrounds in ...   \n",
       "4                  NaN  Remarks at the WNC Agricultural Center's Davis...   \n",
       "5                  NaN  Remarks at the Newtown Athletic Club Sports Tr...   \n",
       "6                  NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "7                  NaN  Remarks at the Collier County Fairgrounds in N...   \n",
       "8                  NaN                                                NaN   \n",
       "9                  NaN  Remarks at the Champions Center Expo in Spring...   \n",
       "10                 NaN  Remarks at Radisson Armory in Manchester, New ...   \n",
       "11                 NaN  Remarks at the Jeffco Fairgrounds Event Center...   \n",
       "12                 NaN  Remarks at the Phoenix Convention Center in Ph...   \n",
       "13                 NaN  Remarks at Macomb Community College South Camp...   \n",
       "14                 NaN  Remarks at the Bayfront Park Amphitheater in M...   \n",
       "15                 NaN  Remarks at the Central Florida Fairgrounds in ...   \n",
       "16                 1.0  Remarks at J.S Dorton Arena in Raleigh, North ...   \n",
       "17                 1.0  Accepting Election as the 45th President of th...   \n",
       "18                 1.0  Remarks on Obamacare in Valley Forge, Pennsylv...   \n",
       "19                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "20                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "21                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "22                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "23                 1.0                                                NaN   \n",
       "24                 0.5  https://www.c-span.org/video/?413365-1/donald-...   \n",
       "25                 1.0  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "26                 1.0                                                NaN   \n",
       "27                 1.0                                                NaN   \n",
       "28                 1.0                                                NaN   \n",
       "29                 1.0                                                NaN   \n",
       "30                 1.0                                                NaN   \n",
       "31                 1.0                                                NaN   \n",
       "32                 1.0                                                NaN   \n",
       "33                 1.0                                                NaN   \n",
       "34                 1.0  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "35                 1.0  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "36                 1.0                     Remarks to the American Legion   \n",
       "37                 1.0                                                NaN   \n",
       "38                 1.0  Remarks at Great Faith International Ministrie...   \n",
       "39                 1.0                                                NaN   \n",
       "40                 1.0  Remarks at the Union League of Philadelphia in...   \n",
       "41                 1.0  Remarks at the Cleveland Arts and Social Scien...   \n",
       "42                 1.0                                                NaN   \n",
       "43                 1.0                                                NaN   \n",
       "44                 1.0                                                NaN   \n",
       "45                 1.0                                                NaN   \n",
       "46                 1.0                                                NaN   \n",
       "47                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "48                 1.0                                                NaN   \n",
       "49                 NaN  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "50                 1.0                                                NaN   \n",
       "51                 1.0                                                NaN   \n",
       "52                 1.0  Remarks at the Shale Insight TM Conferencee at...   \n",
       "53                 1.0                                                NaN   \n",
       "54                 1.0                                                NaN   \n",
       "55                 1.0                                                NaN   \n",
       "56                 1.0                                                NaN   \n",
       "57                 1.0  https://www.presidency.ucsb.edu/documents/rema...   \n",
       "\n",
       "   month day              filename_notxt file_name_youtube  \\\n",
       "0     10  11  10_11_Panama City Beach_FL               NaN   \n",
       "1     10  13           10_13_Columbus_OH               NaN   \n",
       "2     10  18   10_18_Colorado Springs_CO               NaN   \n",
       "3     10  20           10_20_Delaware_OH               NaN   \n",
       "4     10  21           10_21_Fletcher_NC               NaN   \n",
       "5     10  21   10_21_Newtown Township_PA               NaN   \n",
       "6     10  22        10_22_Gettysburgh_PA               NaN   \n",
       "7     10  23             10_23_Naples_FL               NaN   \n",
       "8     10  26          10_26_Charlotte_NC               NaN   \n",
       "9     10  27        10_27_Springfield_OH               NaN   \n",
       "10    10  28         10_28_Manchester_NH               NaN   \n",
       "11    10  29             10_29_Golden_CO               NaN   \n",
       "12    10  29            10_29_Phoenix_AZ               NaN   \n",
       "13    10  31             10_31_Warren_MI               NaN   \n",
       "14    11   2               11_2_Miami_FL               NaN   \n",
       "15    11   2             11_2_Orlando_FL               NaN   \n",
       "16    11   7             11_7_Raleigh_NC               NaN   \n",
       "17    11   9       11_9_New York City_NY               NaN   \n",
       "18    11   1        11_1_Valley Forge_PA               NaN   \n",
       "19     6  13          6_13_Manchester_NH               NaN   \n",
       "20     6  22         6_22_NewYorkCity_NY               NaN   \n",
       "21     7  11      7_11_VirginigaBeach_VA               NaN   \n",
       "22     7  16         7_16_NewYorkCity_NY               NaN   \n",
       "23     7  21           7_21_Cleveland_OH               NaN   \n",
       "24     7  29              7_29_Denver_CO               NaN   \n",
       "25     8   9           8_9_Wilmington_NC               NaN   \n",
       "26     8  12                8_12_Erie_PA               NaN   \n",
       "27     8  15          8_15_Youngstown_OH               NaN   \n",
       "28     8  16           8_16_West Bend_WI               NaN   \n",
       "29     8  18           8_18_Charlotte_NC               NaN   \n",
       "30     8  19           8_19_Dimondale_MI               NaN   \n",
       "31     8  23              8_23_Austin_TX               NaN   \n",
       "32     8  30             8_30_Everett_WA               NaN   \n",
       "33     8  31             8_31_Phoenix_AZ               NaN   \n",
       "34     8   5            8_5_Green Bay_WI               NaN   \n",
       "35     8   8              8_8_Detroit_MI               NaN   \n",
       "36     9   1           9_1_Cincinnati_OH               NaN   \n",
       "37     9   1           9_1_Wilmington_OH               NaN   \n",
       "38     9   3              9_3_Detroit_MI               NaN   \n",
       "39     9   6           9_6_Greenville_NC               NaN   \n",
       "40     9   7         9_7_Philadelphia_PA               NaN   \n",
       "41     9   8            9_8_Cleveland_OH               NaN   \n",
       "42     9   9            9_9_Pensacola_FL               NaN   \n",
       "43     9  12           9_12_Asheville_NC               NaN   \n",
       "44     9  13               9_13_Aston_PA               NaN   \n",
       "45     9  13               9_13_Clive_IA               NaN   \n",
       "46     9  14              9_14_Canton_OH               NaN   \n",
       "47     9  15         9_15_NewYorkCity_NY               NaN   \n",
       "48     9  16               9_16_Miami_FL               NaN   \n",
       "49     9  17             9_17_Houston_TX               NaN   \n",
       "50     9  20          9_20_High Point_NC               NaN   \n",
       "51     9  22    9_22_Chester Township_PA               NaN   \n",
       "52     9  22          9_22_Pittsburgh_PA               NaN   \n",
       "53     9  24             9_24_Roanoke_VA               NaN   \n",
       "54     9  28      9_28_Council Bluffs_IA               NaN   \n",
       "55     9  28            9_28_Waukesha_WI               NaN   \n",
       "56     9  29             9_29_Bedford_NH               NaN   \n",
       "57     9  30                9_30_Novi_MI               NaN   \n",
       "\n",
       "    file_name_youtubeee Youtube  blank  Unique  \n",
       "0                   NaN     NaN    NaN     NaN  \n",
       "1                   NaN     NaN    NaN     NaN  \n",
       "2                   NaN     NaN    NaN     NaN  \n",
       "3                   NaN     NaN    NaN     NaN  \n",
       "4                   NaN     NaN    NaN     NaN  \n",
       "5                   NaN     NaN    NaN     NaN  \n",
       "6                   NaN     NaN    NaN     NaN  \n",
       "7                   NaN     NaN    NaN     NaN  \n",
       "8       tNcgUGm6BEw.txt     1.0    1.0     NaN  \n",
       "9                   NaN     NaN    NaN     NaN  \n",
       "10                  NaN     NaN    NaN     NaN  \n",
       "11                  NaN     NaN    NaN     NaN  \n",
       "12                  NaN     NaN    NaN     NaN  \n",
       "13                  NaN     NaN    NaN     NaN  \n",
       "14                  NaN     NaN    NaN     NaN  \n",
       "15                  NaN     NaN    NaN     NaN  \n",
       "16                  NaN     NaN    NaN     NaN  \n",
       "17                  NaN     NaN    NaN     NaN  \n",
       "18                  NaN     NaN    NaN     NaN  \n",
       "19                  NaN     NaN    NaN     NaN  \n",
       "20                  NaN     NaN    NaN     NaN  \n",
       "21                  NaN     NaN    NaN     NaN  \n",
       "22                  NaN     NaN    NaN     NaN  \n",
       "23                  NaN     NaN    NaN     NaN  \n",
       "24                  NaN     NaN    NaN     NaN  \n",
       "25                  NaN     NaN    NaN     NaN  \n",
       "26                  NaN     NaN    NaN     NaN  \n",
       "27                  NaN     NaN    NaN     NaN  \n",
       "28                  NaN     NaN    NaN     NaN  \n",
       "29                  NaN     NaN    NaN     NaN  \n",
       "30                  NaN     NaN    NaN     NaN  \n",
       "31                  NaN     NaN    NaN     NaN  \n",
       "32                  NaN     NaN    NaN     NaN  \n",
       "33                  NaN     NaN    NaN     NaN  \n",
       "34                  NaN     NaN    NaN     NaN  \n",
       "35                  NaN     NaN    NaN     NaN  \n",
       "36                  NaN     NaN    NaN     NaN  \n",
       "37                  NaN     NaN    NaN     NaN  \n",
       "38                  NaN     NaN    NaN     NaN  \n",
       "39                  NaN     NaN    NaN     NaN  \n",
       "40                  NaN     NaN    NaN     NaN  \n",
       "41                  NaN     NaN    NaN     NaN  \n",
       "42                  NaN     NaN    NaN     NaN  \n",
       "43                  NaN     NaN    NaN     NaN  \n",
       "44                  NaN     NaN    NaN     NaN  \n",
       "45                  NaN     NaN    NaN     NaN  \n",
       "46                  NaN     NaN    NaN     NaN  \n",
       "47                  NaN     NaN    NaN     NaN  \n",
       "48                  NaN     NaN    NaN     NaN  \n",
       "49                  NaN     NaN    NaN     NaN  \n",
       "50                  NaN     NaN    NaN     NaN  \n",
       "51                  NaN     NaN    NaN     NaN  \n",
       "52                  NaN     NaN    NaN     NaN  \n",
       "53                  NaN     NaN    NaN     NaN  \n",
       "54                  NaN     NaN    NaN     NaN  \n",
       "55                  NaN     NaN    NaN     NaN  \n",
       "56                  NaN     NaN    NaN     NaN  \n",
       "57                  NaN     NaN    NaN     NaN  \n",
       "\n",
       "[58 rows x 26 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df = merged_df.rename(columns={'file_name': 'identifier'})\n",
    "\n",
    "merged_df.to_csv('trans.csv')\n",
    "merged_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Frequent words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('the', 7707), ('and', 5941), ('to', 5698), ('of', 3898), ('a', 2968), ('in', 2950), ('we', 2826), ('i', 2272), ('our', 2110), ('will', 2109), ('that', 1973), ('you', 1870), ('for', 1827), ('is', 1814), ('have', 1613), ('are', 1509), ('going', 1445), ('it', 1386), ('this', 1279), ('be', 1154), ('they', 1140), ('people', 1093), ('on', 921), ('country', 888), ('all', 841), ('with', 839), ('she', 826), ('were', 826), ('so', 815), ('hillary', 796), ('its', 772), ('but', 764), ('not', 764), ('â€“', 721), ('by', 705), ('american', 676), ('clinton', 669), ('who', 649), ('was', 621), ('jobs', 612), ('from', 606), ('has', 595), ('one', 579), ('their', 569), ('at', 548), ('as', 545), ('her', 537), ('what', 527), ('great', 509), ('do', 508)]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "# Join all the texts in the clean_text column\n",
    "all_texts = ' '.join(df['clean_text'].tolist())\n",
    "\n",
    "# Split the text into individual words\n",
    "words = all_texts.split()\n",
    "\n",
    "# Count the frequency of each word\n",
    "word_freq = Counter(words)\n",
    "\n",
    "# Print the 10 most common words and their frequencies\n",
    "print(word_freq.most_common(50))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Youtube"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Directory for Youtube"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New Working Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\\Data\\Text\\Youtube\n"
     ]
    }
   ],
   "source": [
    "# Now, navigate three levels down to Data\\Text\\Presidential\n",
    "new_directory = os.path.join(parent_directory, \"Data\", \"Text\", \"Youtube\")\n",
    "\n",
    "# Change the working directory to the new directory\n",
    "os.chdir(new_directory)\n",
    "print(\"New Working Directory:\", os.getcwd())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Putting together all txt files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a list to store the data\n",
    "data = []\n",
    "\n",
    "# Get a list of all .txt files in the current directory\n",
    "file_list = glob.glob(\"*.txt\")\n",
    "\n",
    "# Loop through each file and extract the data\n",
    "for file_name in file_list:\n",
    "    with open(file_name, \"r\") as file:\n",
    "        for line in file:\n",
    "            # Remove any newline characters and append the data to the list\n",
    "            data.append((line.strip(), file_name))\n",
    "\n",
    "# Create a dataframe from the data\n",
    "df = pd.DataFrame(data, columns=[\"sentence\", \"file_name\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Cleaning the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Apply cleaning function to 'sentence' column\n",
    "df['clean_text'] = df['sentence'].apply(clean_text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Applying the dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['culture']=0\n",
    "df['culture2']=0\n",
    "\n",
    "df['pro_work']=0\n",
    "df['pro_work2']=0\n",
    "\n",
    "\n",
    "\n",
    "for i in list_culture:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i)) & (~df[\"sentence\"].isna()), 'culture']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i)) & (~df[\"clean_text\"].isna()), 'culture2']=1\n",
    "    \n",
    "for i in list_worker:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i)) & (~df[\"sentence\"].isna()), 'pro_work']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i)) & (~df[\"clean_text\"].isna()), 'pro_work2']=1\n",
    "for i in list_worker2:\n",
    "    df.loc[(df[\"sentence\"].str.contains(i[0])) & (df[\"sentence\"].str.contains(i[1])), 'pro_work']=1\n",
    "    df.loc[(df[\"clean_text\"].str.contains(i[0])) & (df[\"clean_text\"].str.contains(i[1])), 'pro_work2']=1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['culture_count'] = df['clean_text'].str.count('|'.join(list_culture))\n",
    "\n",
    "df['worker_count'] = df['clean_text'].str.count('|'.join(list_worker))\n",
    "\n",
    "df['worker_count2'] = 0  # Initialize the count column to 0\n",
    "\n",
    "for sublist in list_worker2:\n",
    "    for word in sublist:\n",
    "        # Use str.contains() to check if each word in the sublist appears in each sentence\n",
    "        # If it does, add 1 to the worker_count column for that row\n",
    "        df.loc[(df['clean_text'].str.contains(word, case=False)) & (~df['clean_text'].isna()), 'worker_count2'] += 1\n",
    "df['pro_worker_count'] = df['worker_count']+df['worker_count2']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['sentence_length'] = df['sentence'].apply(len)\n",
    "df['word_count'] = df['sentence'].str.split().apply(len)\n",
    "\n",
    "df = df[df['sentence_length'] != 0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Total_sentences']=1\n",
    "sum=df[[ 'culture_count', 'pro_worker_count','Total_sentences','word_count','file_name']].groupby(['file_name']).sum()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merging with MSA data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New Working Directory: C:\\Users\\vgonz\\Dropbox\\Pitt\\OneDrive for Business\\Dissertation - Vale\\Paper 2 - Political-Economic Polarization\\Replication\\Data\\Text\n"
     ]
    }
   ],
   "source": [
    "# Now, navigate three levels down to Data\\Text\\Presidential\n",
    "new_directory = os.path.join(parent_directory, \"Data\", \"Text\")\n",
    "\n",
    "# Change the working directory to the new directory\n",
    "os.chdir(new_directory)\n",
    "print(\"New Working Directory:\", os.getcwd())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "msa = pd.read_excel('Rallies_MSA.xlsx')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "msa['file_name'] = msa['file_name_youtubeee']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>identifier</th>\n",
       "      <th>culture_count</th>\n",
       "      <th>pro_worker_count</th>\n",
       "      <th>Total_sentences</th>\n",
       "      <th>word_count</th>\n",
       "      <th>City</th>\n",
       "      <th>MSA</th>\n",
       "      <th>MSA_state</th>\n",
       "      <th>New</th>\n",
       "      <th>Date of Rally</th>\n",
       "      <th>...</th>\n",
       "      <th>Found (1 yes, 0 no)</th>\n",
       "      <th>Link</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>filename_notxt</th>\n",
       "      <th>file_name_youtube</th>\n",
       "      <th>file_name_youtubeee</th>\n",
       "      <th>Youtube</th>\n",
       "      <th>blank</th>\n",
       "      <th>Unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>11312</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-01</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=1cxw0HrFf00</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>11_1_Eau Claire_WI</td>\n",
       "      <td>1cxw0HrFf00</td>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>7166</td>\n",
       "      <td>Portland</td>\n",
       "      <td>Portland-South Portland</td>\n",
       "      <td>Portland-South Portland_ME</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-04</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=2HoQKX6upm0</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>8_4_Portland_ME</td>\n",
       "      <td>2HoQKX6upm0</td>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>7096</td>\n",
       "      <td>Fairfield</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk_CT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-13</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=2R3eHzOMVOk</td>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "      <td>8_13_Fairfield_CT</td>\n",
       "      <td>2R3eHzOMVOk</td>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>53</td>\n",
       "      <td>143</td>\n",
       "      <td>1</td>\n",
       "      <td>14147</td>\n",
       "      <td>Atlanta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta_GA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-06-15</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=4SUYvWpGgEM</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>6_15_Atlanta_GA</td>\n",
       "      <td>4SUYvWpGgEM</td>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>37</td>\n",
       "      <td>125</td>\n",
       "      <td>1</td>\n",
       "      <td>12817</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=5e6vf02MbcI</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>8_3_Jacksonville_FL</td>\n",
       "      <td>5e6vf02MbcI</td>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6QgzcPNrUU8_ab_channel=WorldBeatTV.txt</td>\n",
       "      <td>47</td>\n",
       "      <td>79</td>\n",
       "      <td>1</td>\n",
       "      <td>8019</td>\n",
       "      <td>Westfield</td>\n",
       "      <td>Indianapolis-Carmel-Anderson</td>\n",
       "      <td>Indianapolis-Carmel-Anderson_IN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-07-12</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=6QgzcPNrUU8&amp;ab...</td>\n",
       "      <td>7</td>\n",
       "      <td>12</td>\n",
       "      <td>7_12_Westfield_IN</td>\n",
       "      <td>6QgzcPNrUU8&amp;ab_channel=WorldBeatTV</td>\n",
       "      <td>6QgzcPNrUU8_ab_channel=WorldBeatTV.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7pZhhdJIWQg.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>64</td>\n",
       "      <td>1</td>\n",
       "      <td>8943</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-07-29</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=7pZhhdJIWQg</td>\n",
       "      <td>7</td>\n",
       "      <td>29</td>\n",
       "      <td>7_29_Colorado Springs_CO</td>\n",
       "      <td>7pZhhdJIWQg</td>\n",
       "      <td>7pZhhdJIWQg.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8y4rwXdz0I0.txt</td>\n",
       "      <td>43</td>\n",
       "      <td>79</td>\n",
       "      <td>1</td>\n",
       "      <td>13952</td>\n",
       "      <td>Selma</td>\n",
       "      <td>Raleigh-Cary</td>\n",
       "      <td>Raleigh-Cary_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=8y4rwXdz0I0</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>11_3_Selma_NC</td>\n",
       "      <td>8y4rwXdz0I0</td>\n",
       "      <td>8y4rwXdz0I0.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9ipj90ABxnY_ab_channel=ABCNews.txt</td>\n",
       "      <td>28</td>\n",
       "      <td>100</td>\n",
       "      <td>1</td>\n",
       "      <td>7747</td>\n",
       "      <td>Grand Rapids</td>\n",
       "      <td>Grand Rapids-Kentwood</td>\n",
       "      <td>Grand Rapids-Kentwood_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-31</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.youtube.com/watch?v=9ipj90ABxnY&amp;ab...</td>\n",
       "      <td>10</td>\n",
       "      <td>31</td>\n",
       "      <td>10_31_Grand Rapids_MI</td>\n",
       "      <td>9ipj90ABxnY&amp;ab_channel=ABCNews</td>\n",
       "      <td>9ipj90ABxnY_ab_channel=ABCNews.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>A3QBA4F1r_k.txt</td>\n",
       "      <td>21</td>\n",
       "      <td>47</td>\n",
       "      <td>1</td>\n",
       "      <td>3721</td>\n",
       "      <td>Leesburg</td>\n",
       "      <td>Washington-Arlington-Alexandria</td>\n",
       "      <td>Washington-Arlington-Alexandria_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=A3QBA4F1r_k</td>\n",
       "      <td>11</td>\n",
       "      <td>6</td>\n",
       "      <td>11_6_Leesburg_VA</td>\n",
       "      <td>A3QBA4F1r_k</td>\n",
       "      <td>A3QBA4F1r_k.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>BBPZIlj1Vf4.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>76</td>\n",
       "      <td>1</td>\n",
       "      <td>4741</td>\n",
       "      <td>Toledo</td>\n",
       "      <td>Toledo</td>\n",
       "      <td>Toledo_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=BBPZIlj1Vf4</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>10_27_Toledo_OH</td>\n",
       "      <td>BBPZIlj1Vf4</td>\n",
       "      <td>BBPZIlj1Vf4.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>DLQyOn6cdME.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>64</td>\n",
       "      <td>1</td>\n",
       "      <td>5035</td>\n",
       "      <td>Greensboro</td>\n",
       "      <td>Winston-Salem</td>\n",
       "      <td>Winston-Salem_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-14</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=DLQyOn6cdME</td>\n",
       "      <td>10</td>\n",
       "      <td>14</td>\n",
       "      <td>10_14_Greensboro_NC</td>\n",
       "      <td>DLQyOn6cdME</td>\n",
       "      <td>DLQyOn6cdME.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>G3d0a6UFNcE.txt</td>\n",
       "      <td>55</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>4938</td>\n",
       "      <td>Estero</td>\n",
       "      <td>Cape Coral-Fort Myers</td>\n",
       "      <td>Cape Coral-Fort Myers_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-19</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=G3d0a6UFNcE</td>\n",
       "      <td>9</td>\n",
       "      <td>19</td>\n",
       "      <td>9_19_Estero_FL</td>\n",
       "      <td>G3d0a6UFNcE</td>\n",
       "      <td>G3d0a6UFNcE.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>GhdPUPD7puI.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>2352</td>\n",
       "      <td>Moon Township</td>\n",
       "      <td>Pittsburgh</td>\n",
       "      <td>Pittsburgh_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=GhdPUPD7puI</td>\n",
       "      <td>11</td>\n",
       "      <td>6</td>\n",
       "      <td>11_6_Moon Township_PA</td>\n",
       "      <td>GhdPUPD7puI</td>\n",
       "      <td>GhdPUPD7puI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Iuw2yZ7jnw8.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>87</td>\n",
       "      <td>1</td>\n",
       "      <td>3601</td>\n",
       "      <td>Laconia</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-15</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=Iuw2yZ7jnw8</td>\n",
       "      <td>9</td>\n",
       "      <td>15</td>\n",
       "      <td>9_15_Laconia_NH</td>\n",
       "      <td>Iuw2yZ7jnw8</td>\n",
       "      <td>Iuw2yZ7jnw8.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>JZFZmNHdIuA.txt</td>\n",
       "      <td>23</td>\n",
       "      <td>71</td>\n",
       "      <td>1</td>\n",
       "      <td>9441</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-07-06</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=JZFZmNHdIuA</td>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>7_6_Cincinnati_OH</td>\n",
       "      <td>JZFZmNHdIuA</td>\n",
       "      <td>JZFZmNHdIuA.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>NwNGAcDVdsY.txt</td>\n",
       "      <td>28</td>\n",
       "      <td>112</td>\n",
       "      <td>1</td>\n",
       "      <td>7259</td>\n",
       "      <td>St. Clairsville</td>\n",
       "      <td>Weirton-Steubenville</td>\n",
       "      <td>Weirton-Steubenville_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-06-28</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=NwNGAcDVdsY</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>6_28_St. Clairsville_OH</td>\n",
       "      <td>NwNGAcDVdsY</td>\n",
       "      <td>NwNGAcDVdsY.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>QhDlYABiWvY.txt</td>\n",
       "      <td>17</td>\n",
       "      <td>70</td>\n",
       "      <td>1</td>\n",
       "      <td>4729</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs</td>\n",
       "      <td>Colorado Springs_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-09-17</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=QhDlYABiWvY</td>\n",
       "      <td>9</td>\n",
       "      <td>17</td>\n",
       "      <td>9_17_Colorado Springs_CO</td>\n",
       "      <td>QhDlYABiWvY</td>\n",
       "      <td>QhDlYABiWvY.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Wuy1M05DM38_ab_channel=ABCNews.txt</td>\n",
       "      <td>16</td>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>5157</td>\n",
       "      <td>Tampa</td>\n",
       "      <td>Tampa-St. Petersburg-Clearwater</td>\n",
       "      <td>Tampa-St. Petersburg-Clearwater_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-05</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=Wuy1M05DM38&amp;ab...</td>\n",
       "      <td>11</td>\n",
       "      <td>5</td>\n",
       "      <td>11_5_Tampa_FL</td>\n",
       "      <td>Wuy1M05DM38&amp;ab_channel=ABCNews</td>\n",
       "      <td>Wuy1M05DM38_ab_channel=ABCNews.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Wuy1M05DM38_t=3s.txt</td>\n",
       "      <td>16</td>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>5157</td>\n",
       "      <td>St. Augustine</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-24</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=Wuy1M05DM38&amp;t=3s</td>\n",
       "      <td>10</td>\n",
       "      <td>24</td>\n",
       "      <td>10_24_St. Augustine_FL</td>\n",
       "      <td>Wuy1M05DM38&amp;t=3s</td>\n",
       "      <td>Wuy1M05DM38_t=3s.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>XWu2nDNmreA.txt</td>\n",
       "      <td>34</td>\n",
       "      <td>68</td>\n",
       "      <td>1</td>\n",
       "      <td>8042</td>\n",
       "      <td>Windham</td>\n",
       "      <td>Boston-Cambridge-Newton</td>\n",
       "      <td>Boston-Cambridge-Newton_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-06</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=XWu2nDNmreA</td>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>8_6_Windham_NH</td>\n",
       "      <td>XWu2nDNmreA</td>\n",
       "      <td>XWu2nDNmreA.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Yf2tnm_07Kw.txt</td>\n",
       "      <td>17</td>\n",
       "      <td>83</td>\n",
       "      <td>1</td>\n",
       "      <td>4618</td>\n",
       "      <td>Sterling Heights</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit-Warren-Dearborn_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=Yf2tnm_07Kw</td>\n",
       "      <td>11</td>\n",
       "      <td>6</td>\n",
       "      <td>11_6_Sterling Heights_MI</td>\n",
       "      <td>Yf2tnm_07Kw</td>\n",
       "      <td>Yf2tnm_07Kw.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>YqDl4JtHAhU.txt</td>\n",
       "      <td>31</td>\n",
       "      <td>134</td>\n",
       "      <td>1</td>\n",
       "      <td>16168</td>\n",
       "      <td>Scranton</td>\n",
       "      <td>Scranton–Wilkes-Barre</td>\n",
       "      <td>Scranton-Wilkes-Barre_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=YqDl4JtHAhU</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>11_7_Scranton_PA</td>\n",
       "      <td>YqDl4JtHAhU</td>\n",
       "      <td>YqDl4JtHAhU.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>aMZsKq99hdk.txt</td>\n",
       "      <td>33</td>\n",
       "      <td>102</td>\n",
       "      <td>1</td>\n",
       "      <td>14713</td>\n",
       "      <td>Greeley</td>\n",
       "      <td>Greeley</td>\n",
       "      <td>Greeley_CO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=aMZsKq99hdk</td>\n",
       "      <td>10</td>\n",
       "      <td>30</td>\n",
       "      <td>10_30_Greeley_CO</td>\n",
       "      <td>aMZsKq99hdk</td>\n",
       "      <td>aMZsKq99hdk.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>b3z7dIOAvEU.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "      <td>3173</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Wilmington_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=b3z7dIOAvEU</td>\n",
       "      <td>11</td>\n",
       "      <td>5</td>\n",
       "      <td>11_5_Wilmington_NC</td>\n",
       "      <td>b3z7dIOAvEU</td>\n",
       "      <td>b3z7dIOAvEU.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>b3z7dIOAvEU_t=13s.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "      <td>3173</td>\n",
       "      <td>Wilmington</td>\n",
       "      <td>Cincinnati</td>\n",
       "      <td>Cincinnati_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=b3z7dIOAvEU&amp;t=13s</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>11_4_Wilmington_OH</td>\n",
       "      <td>b3z7dIOAvEU&amp;t=13s</td>\n",
       "      <td>b3z7dIOAvEU_t=13s.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>cZjTIVIFNOM_ab_channel=FactbaseVideos.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>124</td>\n",
       "      <td>1</td>\n",
       "      <td>9579</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>Columbus_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-01</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=cZjTIVIFNOM&amp;ab...</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8_1_Columbus_OH</td>\n",
       "      <td>cZjTIVIFNOM&amp;ab_channel=FactbaseVideos</td>\n",
       "      <td>cZjTIVIFNOM_ab_channel=FactbaseVideos.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>dFh73ll9DaY_ab_channel=LiveNOWfromFOX.txt</td>\n",
       "      <td>27</td>\n",
       "      <td>61</td>\n",
       "      <td>1</td>\n",
       "      <td>5662</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-03</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=dFh73ll9DaY&amp;ab...</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>11_3_Jacksonville_FL</td>\n",
       "      <td>dFh73ll9DaY&amp;ab_channel=LiveNOWfromFOX</td>\n",
       "      <td>dFh73ll9DaY_ab_channel=LiveNOWfromFOX.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>h0vy5Myu5d4.txt</td>\n",
       "      <td>22</td>\n",
       "      <td>90</td>\n",
       "      <td>1</td>\n",
       "      <td>9516</td>\n",
       "      <td>Raleigh</td>\n",
       "      <td>Raleigh-Cary</td>\n",
       "      <td>Raleigh-Cary_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-07-05</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=h0vy5Myu5d4</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>7_5_Raleigh_NC</td>\n",
       "      <td>h0vy5Myu5d4</td>\n",
       "      <td>h0vy5Myu5d4.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>hjRO5MsJ9u8.txt</td>\n",
       "      <td>18</td>\n",
       "      <td>73</td>\n",
       "      <td>1</td>\n",
       "      <td>5426</td>\n",
       "      <td>Johnstown</td>\n",
       "      <td>Johnstown</td>\n",
       "      <td>Johnstown_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=hjRO5MsJ9u8</td>\n",
       "      <td>10</td>\n",
       "      <td>21</td>\n",
       "      <td>10_21_Johnstown_PA</td>\n",
       "      <td>hjRO5MsJ9u8</td>\n",
       "      <td>hjRO5MsJ9u8.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>iOud4RCw7xI.txt</td>\n",
       "      <td>19</td>\n",
       "      <td>86</td>\n",
       "      <td>1</td>\n",
       "      <td>5970</td>\n",
       "      <td>Kenansville</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=iOud4RCw7xI</td>\n",
       "      <td>9</td>\n",
       "      <td>20</td>\n",
       "      <td>9_20_Kenansville_NC</td>\n",
       "      <td>iOud4RCw7xI</td>\n",
       "      <td>iOud4RCw7xI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>lp_dfFlxB1w.txt</td>\n",
       "      <td>41</td>\n",
       "      <td>107</td>\n",
       "      <td>1</td>\n",
       "      <td>6873</td>\n",
       "      <td>The Woodlands</td>\n",
       "      <td>Houston-The Woodlands-Sugar Land</td>\n",
       "      <td>Houston-The Woodlands-Sugar Land_TX</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-06-17</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=lp_dfFlxB1w</td>\n",
       "      <td>6</td>\n",
       "      <td>17</td>\n",
       "      <td>6_17_The Woodlands_TX</td>\n",
       "      <td>lp_dfFlxB1w</td>\n",
       "      <td>lp_dfFlxB1w.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>m7PKIFO2NAs.txt</td>\n",
       "      <td>44</td>\n",
       "      <td>43</td>\n",
       "      <td>1</td>\n",
       "      <td>5826</td>\n",
       "      <td>Fayetteville</td>\n",
       "      <td>Fayetteville</td>\n",
       "      <td>Fayetteville_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-08-09</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=m7PKIFO2NAs</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>8_9_Fayetteville_NC</td>\n",
       "      <td>m7PKIFO2NAs</td>\n",
       "      <td>m7PKIFO2NAs.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>p-CuQGDneyY.txt</td>\n",
       "      <td>28</td>\n",
       "      <td>72</td>\n",
       "      <td>1</td>\n",
       "      <td>5372</td>\n",
       "      <td>Concord</td>\n",
       "      <td>Charlotte-Concord-Gastonia</td>\n",
       "      <td>Charlotte-Concord-Gastonia_NC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=p-CuQGDneyY</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>11_3_Concord_NC</td>\n",
       "      <td>p-CuQGDneyY</td>\n",
       "      <td>p-CuQGDneyY.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>prYyDhIoGsE.txt</td>\n",
       "      <td>34</td>\n",
       "      <td>64</td>\n",
       "      <td>1</td>\n",
       "      <td>12451</td>\n",
       "      <td>Las Vegas</td>\n",
       "      <td>Las Vegas-Henderson-Paradise</td>\n",
       "      <td>Las Vegas-Henderson-Paradise_NV</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-10-30</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=prYyDhIoGsE</td>\n",
       "      <td>10</td>\n",
       "      <td>30</td>\n",
       "      <td>10_30_Las Vegas_NV</td>\n",
       "      <td>prYyDhIoGsE</td>\n",
       "      <td>prYyDhIoGsE.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>qKvWOqrYHqg.txt</td>\n",
       "      <td>31</td>\n",
       "      <td>83</td>\n",
       "      <td>1</td>\n",
       "      <td>7891</td>\n",
       "      <td>Cleveland</td>\n",
       "      <td>Cleveland-Elyria</td>\n",
       "      <td>Cleveland-Elyria_OH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=qKvWOqrYHqg</td>\n",
       "      <td>10</td>\n",
       "      <td>22</td>\n",
       "      <td>10_22_Cleveland_OH</td>\n",
       "      <td>qKvWOqrYHqg</td>\n",
       "      <td>qKvWOqrYHqg.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>qpl_D637fgA.txt</td>\n",
       "      <td>35</td>\n",
       "      <td>96</td>\n",
       "      <td>1</td>\n",
       "      <td>11718</td>\n",
       "      <td>Hershey</td>\n",
       "      <td>Harrisburg-Carlisle</td>\n",
       "      <td>Harrisburg-Carlisle_PA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=qpl_D637fgA</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>11_4_Hershey_PA</td>\n",
       "      <td>qpl_D637fgA</td>\n",
       "      <td>qpl_D637fgA.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>rjqAsbufsTI.txt</td>\n",
       "      <td>37</td>\n",
       "      <td>78</td>\n",
       "      <td>1</td>\n",
       "      <td>11824</td>\n",
       "      <td>Albuquerque</td>\n",
       "      <td>Albuquerque</td>\n",
       "      <td>Albuquerque_NM</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=rjqAsbufsTI</td>\n",
       "      <td>10</td>\n",
       "      <td>30</td>\n",
       "      <td>10_30_Albuquerque_NM</td>\n",
       "      <td>rjqAsbufsTI</td>\n",
       "      <td>rjqAsbufsTI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>ws-obYHTf_o.txt</td>\n",
       "      <td>15</td>\n",
       "      <td>34</td>\n",
       "      <td>1</td>\n",
       "      <td>3003</td>\n",
       "      <td>Cedar Rapids</td>\n",
       "      <td>Cedar Rapids</td>\n",
       "      <td>Cedar Rapids_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=ws-obYHTf_o</td>\n",
       "      <td>10</td>\n",
       "      <td>28</td>\n",
       "      <td>10_28_Cedar Rapids_IA</td>\n",
       "      <td>ws-obYHTf_o</td>\n",
       "      <td>ws-obYHTf_o.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>zbruHdFvAf8.txt</td>\n",
       "      <td>22</td>\n",
       "      <td>79</td>\n",
       "      <td>1</td>\n",
       "      <td>5192</td>\n",
       "      <td>Atkinson</td>\n",
       "      <td>Boston-Cambridge-Newton</td>\n",
       "      <td>Boston-Cambridge-Newton_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2016-11-04</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=zbruHdFvAf8</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>11_4_Atkinson_NH</td>\n",
       "      <td>zbruHdFvAf8</td>\n",
       "      <td>zbruHdFvAf8.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   identifier  culture_count  \\\n",
       "0                             1cxw0HrFf00.txt             25   \n",
       "1                             2HoQKX6upm0.txt             24   \n",
       "2                             2R3eHzOMVOk.txt             25   \n",
       "3                             4SUYvWpGgEM.txt             53   \n",
       "4                             5e6vf02MbcI.txt             37   \n",
       "5      6QgzcPNrUU8_ab_channel=WorldBeatTV.txt             47   \n",
       "6                             7pZhhdJIWQg.txt              7   \n",
       "7                             8y4rwXdz0I0.txt             43   \n",
       "8          9ipj90ABxnY_ab_channel=ABCNews.txt             28   \n",
       "9                             A3QBA4F1r_k.txt             21   \n",
       "10                            BBPZIlj1Vf4.txt             18   \n",
       "11                            DLQyOn6cdME.txt             12   \n",
       "12                            G3d0a6UFNcE.txt             55   \n",
       "13                            GhdPUPD7puI.txt             18   \n",
       "14                            Iuw2yZ7jnw8.txt             12   \n",
       "15                            JZFZmNHdIuA.txt             23   \n",
       "16                            NwNGAcDVdsY.txt             28   \n",
       "17                            QhDlYABiWvY.txt             17   \n",
       "18         Wuy1M05DM38_ab_channel=ABCNews.txt             16   \n",
       "19                       Wuy1M05DM38_t=3s.txt             16   \n",
       "20                            XWu2nDNmreA.txt             34   \n",
       "21                            Yf2tnm_07Kw.txt             17   \n",
       "22                            YqDl4JtHAhU.txt             31   \n",
       "23                            aMZsKq99hdk.txt             33   \n",
       "24                            b3z7dIOAvEU.txt             10   \n",
       "25                      b3z7dIOAvEU_t=13s.txt             10   \n",
       "26  cZjTIVIFNOM_ab_channel=FactbaseVideos.txt             18   \n",
       "27  dFh73ll9DaY_ab_channel=LiveNOWfromFOX.txt             27   \n",
       "28                            h0vy5Myu5d4.txt             22   \n",
       "29                            hjRO5MsJ9u8.txt             18   \n",
       "30                            iOud4RCw7xI.txt             19   \n",
       "31                            lp_dfFlxB1w.txt             41   \n",
       "32                            m7PKIFO2NAs.txt             44   \n",
       "33                            p-CuQGDneyY.txt             28   \n",
       "34                            prYyDhIoGsE.txt             34   \n",
       "35                            qKvWOqrYHqg.txt             31   \n",
       "36                            qpl_D637fgA.txt             35   \n",
       "37                            rjqAsbufsTI.txt             37   \n",
       "38                            ws-obYHTf_o.txt             15   \n",
       "39                            zbruHdFvAf8.txt             22   \n",
       "\n",
       "    pro_worker_count  Total_sentences  word_count              City  \\\n",
       "0                 77                1       11312        Eau Claire   \n",
       "1                 67                1        7166          Portland   \n",
       "2                 77                1        7096         Fairfield   \n",
       "3                143                1       14147           Atlanta   \n",
       "4                125                1       12817      Jacksonville   \n",
       "5                 79                1        8019         Westfield   \n",
       "6                 64                1        8943  Colorado Springs   \n",
       "7                 79                1       13952             Selma   \n",
       "8                100                1        7747      Grand Rapids   \n",
       "9                 47                1        3721          Leesburg   \n",
       "10                76                1        4741            Toledo   \n",
       "11                64                1        5035        Greensboro   \n",
       "12                56                1        4938            Estero   \n",
       "13                56                1        2352     Moon Township   \n",
       "14                87                1        3601           Laconia   \n",
       "15                71                1        9441        Cincinnati   \n",
       "16               112                1        7259   St. Clairsville   \n",
       "17                70                1        4729  Colorado Springs   \n",
       "18                48                1        5157             Tampa   \n",
       "19                48                1        5157     St. Augustine   \n",
       "20                68                1        8042           Windham   \n",
       "21                83                1        4618  Sterling Heights   \n",
       "22               134                1       16168          Scranton   \n",
       "23               102                1       14713           Greeley   \n",
       "24                51                1        3173        Wilmington   \n",
       "25                51                1        3173        Wilmington   \n",
       "26               124                1        9579          Columbus   \n",
       "27                61                1        5662      Jacksonville   \n",
       "28                90                1        9516           Raleigh   \n",
       "29                73                1        5426         Johnstown   \n",
       "30                86                1        5970       Kenansville   \n",
       "31               107                1        6873     The Woodlands   \n",
       "32                43                1        5826      Fayetteville   \n",
       "33                72                1        5372           Concord   \n",
       "34                64                1       12451         Las Vegas   \n",
       "35                83                1        7891         Cleveland   \n",
       "36                96                1       11718           Hershey   \n",
       "37                78                1       11824       Albuquerque   \n",
       "38                34                1        3003      Cedar Rapids   \n",
       "39                79                1        5192          Atkinson   \n",
       "\n",
       "                                 MSA                            MSA_state  \\\n",
       "0                         Eau Claire                        Eau Claire_WI   \n",
       "1            Portland-South Portland           Portland-South Portland_ME   \n",
       "2        Bridgeport-Stamford-Norwalk       Bridgeport-Stamford-Norwalk_CT   \n",
       "3   Atlanta-Sandy Springs-Alpharetta  Atlanta-Sandy Springs-Alpharetta_GA   \n",
       "4                       Jacksonville                      Jacksonville_FL   \n",
       "5       Indianapolis-Carmel-Anderson      Indianapolis-Carmel-Anderson_IN   \n",
       "6                   Colorado Springs                  Colorado Springs_CO   \n",
       "7                       Raleigh-Cary                      Raleigh-Cary_NC   \n",
       "8              Grand Rapids-Kentwood             Grand Rapids-Kentwood_MI   \n",
       "9    Washington-Arlington-Alexandria   Washington-Arlington-Alexandria_VA   \n",
       "10                            Toledo                            Toledo_OH   \n",
       "11                     Winston-Salem                     Winston-Salem_NC   \n",
       "12             Cape Coral-Fort Myers             Cape Coral-Fort Myers_FL   \n",
       "13                        Pittsburgh                        Pittsburgh_PA   \n",
       "14                 Manchester-Nashua                 Manchester-Nashua_NH   \n",
       "15                        Cincinnati                        Cincinnati_OH   \n",
       "16              Weirton-Steubenville              Weirton-Steubenville_OH   \n",
       "17                  Colorado Springs                  Colorado Springs_CO   \n",
       "18   Tampa-St. Petersburg-Clearwater   Tampa-St. Petersburg-Clearwater_FL   \n",
       "19                      Jacksonville                      Jacksonville_FL   \n",
       "20           Boston-Cambridge-Newton           Boston-Cambridge-Newton_NH   \n",
       "21           Detroit–Warren–Dearborn           Detroit-Warren-Dearborn_MI   \n",
       "22             Scranton–Wilkes-Barre             Scranton-Wilkes-Barre_PA   \n",
       "23                           Greeley                           Greeley_CO   \n",
       "24                        Wilmington                        Wilmington_NC   \n",
       "25                        Cincinnati                        Cincinnati_OH   \n",
       "26                          Columbus                          Columbus_OH   \n",
       "27                      Jacksonville                      Jacksonville_FL   \n",
       "28                      Raleigh-Cary                      Raleigh-Cary_NC   \n",
       "29                         Johnstown                         Johnstown_PA   \n",
       "30                      Jacksonville                      Jacksonville_NC   \n",
       "31  Houston-The Woodlands-Sugar Land  Houston-The Woodlands-Sugar Land_TX   \n",
       "32                      Fayetteville                      Fayetteville_NC   \n",
       "33        Charlotte-Concord-Gastonia        Charlotte-Concord-Gastonia_NC   \n",
       "34      Las Vegas-Henderson-Paradise      Las Vegas-Henderson-Paradise_NV   \n",
       "35                  Cleveland-Elyria                  Cleveland-Elyria_OH   \n",
       "36               Harrisburg-Carlisle               Harrisburg-Carlisle_PA   \n",
       "37                       Albuquerque                       Albuquerque_NM   \n",
       "38                      Cedar Rapids                      Cedar Rapids_IA   \n",
       "39           Boston-Cambridge-Newton           Boston-Cambridge-Newton_NH   \n",
       "\n",
       "    New Date of Rally  ... Found (1 yes, 0 no)  \\\n",
       "0   NaN    2016-11-01  ...                 NaN   \n",
       "1   NaN    2016-08-04  ...                 0.5   \n",
       "2   NaN    2016-08-13  ...                 NaN   \n",
       "3   NaN    2016-06-15  ...                 0.5   \n",
       "4   NaN           NaT  ...                 0.5   \n",
       "5   NaN    2016-07-12  ...                 NaN   \n",
       "6   NaN    2016-07-29  ...                 NaN   \n",
       "7   NaN           NaT  ...                 NaN   \n",
       "8   NaN    2016-10-31  ...                 1.0   \n",
       "9   NaN           NaT  ...                 0.5   \n",
       "10  NaN           NaT  ...                 NaN   \n",
       "11  NaN    2016-10-14  ...                 NaN   \n",
       "12  NaN    2016-09-19  ...                 NaN   \n",
       "13  NaN           NaT  ...                 NaN   \n",
       "14  NaN    2016-09-15  ...                 NaN   \n",
       "15  NaN    2016-07-06  ...                 NaN   \n",
       "16  NaN    2016-06-28  ...                 0.5   \n",
       "17  NaN    2016-09-17  ...                 NaN   \n",
       "18  NaN    2016-11-05  ...                 NaN   \n",
       "19  NaN    2016-10-24  ...                 NaN   \n",
       "20  NaN    2016-08-06  ...                 NaN   \n",
       "21  NaN           NaT  ...                 NaN   \n",
       "22  NaN           NaT  ...                 NaN   \n",
       "23  NaN           NaT  ...                 NaN   \n",
       "24  NaN           NaT  ...                 NaN   \n",
       "25  NaN           NaT  ...                 NaN   \n",
       "26  NaN    2016-08-01  ...                 NaN   \n",
       "27  NaN    2016-11-03  ...                 NaN   \n",
       "28  NaN    2016-07-05  ...                 0.5   \n",
       "29  NaN           NaT  ...                 0.5   \n",
       "30  NaN           NaT  ...                 NaN   \n",
       "31  NaN    2016-06-17  ...                 0.5   \n",
       "32  NaN    2016-08-09  ...                 NaN   \n",
       "33  NaN           NaT  ...                 0.5   \n",
       "34  NaN    2016-10-30  ...                 0.5   \n",
       "35  NaN           NaT  ...                 NaN   \n",
       "36  NaN           NaT  ...                 NaN   \n",
       "37  NaN           NaT  ...                 NaN   \n",
       "38  NaN           NaT  ...                 0.5   \n",
       "39  NaN    2016-11-04  ...                 0.5   \n",
       "\n",
       "                                                 Link month day  \\\n",
       "0         https://www.youtube.com/watch?v=1cxw0HrFf00    11   1   \n",
       "1         https://www.youtube.com/watch?v=2HoQKX6upm0     8   4   \n",
       "2         https://www.youtube.com/watch?v=2R3eHzOMVOk     8  13   \n",
       "3         https://www.youtube.com/watch?v=4SUYvWpGgEM     6  15   \n",
       "4         https://www.youtube.com/watch?v=5e6vf02MbcI     8   3   \n",
       "5   https://www.youtube.com/watch?v=6QgzcPNrUU8&ab...     7  12   \n",
       "6         https://www.youtube.com/watch?v=7pZhhdJIWQg     7  29   \n",
       "7         https://www.youtube.com/watch?v=8y4rwXdz0I0    11   3   \n",
       "8   https://www.youtube.com/watch?v=9ipj90ABxnY&ab...    10  31   \n",
       "9         https://www.youtube.com/watch?v=A3QBA4F1r_k    11   6   \n",
       "10        https://www.youtube.com/watch?v=BBPZIlj1Vf4    10  27   \n",
       "11        https://www.youtube.com/watch?v=DLQyOn6cdME    10  14   \n",
       "12        https://www.youtube.com/watch?v=G3d0a6UFNcE     9  19   \n",
       "13        https://www.youtube.com/watch?v=GhdPUPD7puI    11   6   \n",
       "14        https://www.youtube.com/watch?v=Iuw2yZ7jnw8     9  15   \n",
       "15        https://www.youtube.com/watch?v=JZFZmNHdIuA     7   6   \n",
       "16        https://www.youtube.com/watch?v=NwNGAcDVdsY     6  28   \n",
       "17        https://www.youtube.com/watch?v=QhDlYABiWvY     9  17   \n",
       "18  https://www.youtube.com/watch?v=Wuy1M05DM38&ab...    11   5   \n",
       "19   https://www.youtube.com/watch?v=Wuy1M05DM38&t=3s    10  24   \n",
       "20        https://www.youtube.com/watch?v=XWu2nDNmreA     8   6   \n",
       "21        https://www.youtube.com/watch?v=Yf2tnm_07Kw    11   6   \n",
       "22        https://www.youtube.com/watch?v=YqDl4JtHAhU    11   7   \n",
       "23        https://www.youtube.com/watch?v=aMZsKq99hdk    10  30   \n",
       "24        https://www.youtube.com/watch?v=b3z7dIOAvEU    11   5   \n",
       "25  https://www.youtube.com/watch?v=b3z7dIOAvEU&t=13s    11   4   \n",
       "26  https://www.youtube.com/watch?v=cZjTIVIFNOM&ab...     8   1   \n",
       "27  https://www.youtube.com/watch?v=dFh73ll9DaY&ab...    11   3   \n",
       "28        https://www.youtube.com/watch?v=h0vy5Myu5d4     7   5   \n",
       "29        https://www.youtube.com/watch?v=hjRO5MsJ9u8    10  21   \n",
       "30        https://www.youtube.com/watch?v=iOud4RCw7xI     9  20   \n",
       "31        https://www.youtube.com/watch?v=lp_dfFlxB1w     6  17   \n",
       "32        https://www.youtube.com/watch?v=m7PKIFO2NAs     8   9   \n",
       "33        https://www.youtube.com/watch?v=p-CuQGDneyY    11   3   \n",
       "34        https://www.youtube.com/watch?v=prYyDhIoGsE    10  30   \n",
       "35        https://www.youtube.com/watch?v=qKvWOqrYHqg    10  22   \n",
       "36        https://www.youtube.com/watch?v=qpl_D637fgA    11   4   \n",
       "37        https://www.youtube.com/watch?v=rjqAsbufsTI    10  30   \n",
       "38        https://www.youtube.com/watch?v=ws-obYHTf_o    10  28   \n",
       "39        https://www.youtube.com/watch?v=zbruHdFvAf8    11   4   \n",
       "\n",
       "              filename_notxt                      file_name_youtube  \\\n",
       "0         11_1_Eau Claire_WI                            1cxw0HrFf00   \n",
       "1            8_4_Portland_ME                            2HoQKX6upm0   \n",
       "2          8_13_Fairfield_CT                            2R3eHzOMVOk   \n",
       "3            6_15_Atlanta_GA                            4SUYvWpGgEM   \n",
       "4        8_3_Jacksonville_FL                            5e6vf02MbcI   \n",
       "5          7_12_Westfield_IN     6QgzcPNrUU8&ab_channel=WorldBeatTV   \n",
       "6   7_29_Colorado Springs_CO                            7pZhhdJIWQg   \n",
       "7              11_3_Selma_NC                            8y4rwXdz0I0   \n",
       "8      10_31_Grand Rapids_MI         9ipj90ABxnY&ab_channel=ABCNews   \n",
       "9           11_6_Leesburg_VA                            A3QBA4F1r_k   \n",
       "10           10_27_Toledo_OH                            BBPZIlj1Vf4   \n",
       "11       10_14_Greensboro_NC                            DLQyOn6cdME   \n",
       "12            9_19_Estero_FL                            G3d0a6UFNcE   \n",
       "13     11_6_Moon Township_PA                            GhdPUPD7puI   \n",
       "14           9_15_Laconia_NH                            Iuw2yZ7jnw8   \n",
       "15         7_6_Cincinnati_OH                            JZFZmNHdIuA   \n",
       "16   6_28_St. Clairsville_OH                            NwNGAcDVdsY   \n",
       "17  9_17_Colorado Springs_CO                            QhDlYABiWvY   \n",
       "18             11_5_Tampa_FL         Wuy1M05DM38&ab_channel=ABCNews   \n",
       "19    10_24_St. Augustine_FL                       Wuy1M05DM38&t=3s   \n",
       "20            8_6_Windham_NH                            XWu2nDNmreA   \n",
       "21  11_6_Sterling Heights_MI                            Yf2tnm_07Kw   \n",
       "22          11_7_Scranton_PA                            YqDl4JtHAhU   \n",
       "23          10_30_Greeley_CO                            aMZsKq99hdk   \n",
       "24        11_5_Wilmington_NC                            b3z7dIOAvEU   \n",
       "25        11_4_Wilmington_OH                      b3z7dIOAvEU&t=13s   \n",
       "26           8_1_Columbus_OH  cZjTIVIFNOM&ab_channel=FactbaseVideos   \n",
       "27      11_3_Jacksonville_FL  dFh73ll9DaY&ab_channel=LiveNOWfromFOX   \n",
       "28            7_5_Raleigh_NC                            h0vy5Myu5d4   \n",
       "29        10_21_Johnstown_PA                            hjRO5MsJ9u8   \n",
       "30       9_20_Kenansville_NC                            iOud4RCw7xI   \n",
       "31     6_17_The Woodlands_TX                            lp_dfFlxB1w   \n",
       "32       8_9_Fayetteville_NC                            m7PKIFO2NAs   \n",
       "33           11_3_Concord_NC                            p-CuQGDneyY   \n",
       "34        10_30_Las Vegas_NV                            prYyDhIoGsE   \n",
       "35        10_22_Cleveland_OH                            qKvWOqrYHqg   \n",
       "36           11_4_Hershey_PA                            qpl_D637fgA   \n",
       "37      10_30_Albuquerque_NM                            rjqAsbufsTI   \n",
       "38     10_28_Cedar Rapids_IA                            ws-obYHTf_o   \n",
       "39          11_4_Atkinson_NH                            zbruHdFvAf8   \n",
       "\n",
       "                          file_name_youtubeee Youtube  blank  Unique  \n",
       "0                             1cxw0HrFf00.txt     1.0    NaN     NaN  \n",
       "1                             2HoQKX6upm0.txt     1.0    NaN     NaN  \n",
       "2                             2R3eHzOMVOk.txt     1.0    NaN     NaN  \n",
       "3                             4SUYvWpGgEM.txt     1.0    NaN     NaN  \n",
       "4                             5e6vf02MbcI.txt     1.0    NaN     NaN  \n",
       "5      6QgzcPNrUU8_ab_channel=WorldBeatTV.txt     1.0    NaN     NaN  \n",
       "6                             7pZhhdJIWQg.txt     1.0    NaN     NaN  \n",
       "7                             8y4rwXdz0I0.txt     1.0    NaN     NaN  \n",
       "8          9ipj90ABxnY_ab_channel=ABCNews.txt     1.0    NaN     NaN  \n",
       "9                             A3QBA4F1r_k.txt     1.0    NaN     NaN  \n",
       "10                            BBPZIlj1Vf4.txt     1.0    NaN     NaN  \n",
       "11                            DLQyOn6cdME.txt     1.0    NaN     1.0  \n",
       "12                            G3d0a6UFNcE.txt     1.0    NaN     NaN  \n",
       "13                            GhdPUPD7puI.txt     1.0    NaN     NaN  \n",
       "14                            Iuw2yZ7jnw8.txt     1.0    NaN     NaN  \n",
       "15                            JZFZmNHdIuA.txt     1.0    NaN     NaN  \n",
       "16                            NwNGAcDVdsY.txt     1.0    NaN     NaN  \n",
       "17                            QhDlYABiWvY.txt     1.0    NaN     NaN  \n",
       "18         Wuy1M05DM38_ab_channel=ABCNews.txt     1.0    NaN     NaN  \n",
       "19                       Wuy1M05DM38_t=3s.txt     1.0    NaN     NaN  \n",
       "20                            XWu2nDNmreA.txt     1.0    NaN     NaN  \n",
       "21                            Yf2tnm_07Kw.txt     1.0    NaN     NaN  \n",
       "22                            YqDl4JtHAhU.txt     1.0    NaN     NaN  \n",
       "23                            aMZsKq99hdk.txt     1.0    NaN     NaN  \n",
       "24                            b3z7dIOAvEU.txt     1.0    NaN     NaN  \n",
       "25                      b3z7dIOAvEU_t=13s.txt     1.0    NaN     NaN  \n",
       "26  cZjTIVIFNOM_ab_channel=FactbaseVideos.txt     1.0    NaN     NaN  \n",
       "27  dFh73ll9DaY_ab_channel=LiveNOWfromFOX.txt     1.0    NaN     NaN  \n",
       "28                            h0vy5Myu5d4.txt     1.0    NaN     NaN  \n",
       "29                            hjRO5MsJ9u8.txt     1.0    NaN     NaN  \n",
       "30                            iOud4RCw7xI.txt     1.0    NaN     NaN  \n",
       "31                            lp_dfFlxB1w.txt     1.0    NaN     NaN  \n",
       "32                            m7PKIFO2NAs.txt     1.0    NaN     NaN  \n",
       "33                            p-CuQGDneyY.txt     1.0    NaN     NaN  \n",
       "34                            prYyDhIoGsE.txt     1.0    NaN     NaN  \n",
       "35                            qKvWOqrYHqg.txt     1.0    NaN     NaN  \n",
       "36                            qpl_D637fgA.txt     1.0    NaN     NaN  \n",
       "37                            rjqAsbufsTI.txt     1.0    NaN     NaN  \n",
       "38                            ws-obYHTf_o.txt     1.0    NaN     NaN  \n",
       "39                            zbruHdFvAf8.txt     1.0    NaN     NaN  \n",
       "\n",
       "[40 rows x 26 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "merged_df = pd.merge(sum, msa, on='file_name')\n",
    "\n",
    "merged_df = merged_df.rename(columns={'file_name': 'identifier'})\n",
    "\n",
    "merged_df.to_csv('trans_Y.csv')\n",
    "merged_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Append Youtube and Presidential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "presidential = pd.read_csv('./trans.csv')\n",
    "youtube = pd.read_csv('./trans_Y.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>identifier</th>\n",
       "      <th>culture_count</th>\n",
       "      <th>pro_worker_count</th>\n",
       "      <th>Total_sentences</th>\n",
       "      <th>word_count</th>\n",
       "      <th>City</th>\n",
       "      <th>MSA</th>\n",
       "      <th>MSA_state</th>\n",
       "      <th>New</th>\n",
       "      <th>...</th>\n",
       "      <th>Found (1 yes, 0 no)</th>\n",
       "      <th>Link</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>filename_notxt</th>\n",
       "      <th>file_name_youtube</th>\n",
       "      <th>file_name_youtubeee</th>\n",
       "      <th>Youtube</th>\n",
       "      <th>blank</th>\n",
       "      <th>Unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>11312</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=1cxw0HrFf00</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>11_1_Eau Claire_WI</td>\n",
       "      <td>1cxw0HrFf00</td>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>7166</td>\n",
       "      <td>Portland</td>\n",
       "      <td>Portland-South Portland</td>\n",
       "      <td>Portland-South Portland_ME</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=2HoQKX6upm0</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>8_4_Portland_ME</td>\n",
       "      <td>2HoQKX6upm0</td>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>7096</td>\n",
       "      <td>Fairfield</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk_CT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=2R3eHzOMVOk</td>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "      <td>8_13_Fairfield_CT</td>\n",
       "      <td>2R3eHzOMVOk</td>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>53</td>\n",
       "      <td>143</td>\n",
       "      <td>1</td>\n",
       "      <td>14147</td>\n",
       "      <td>Atlanta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta_GA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=4SUYvWpGgEM</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>6_15_Atlanta_GA</td>\n",
       "      <td>4SUYvWpGgEM</td>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>37</td>\n",
       "      <td>125</td>\n",
       "      <td>1</td>\n",
       "      <td>12817</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=5e6vf02MbcI</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>8_3_Jacksonville_FL</td>\n",
       "      <td>5e6vf02MbcI</td>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>53</td>\n",
       "      <td>9_24_Roanoke_VA.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>360</td>\n",
       "      <td>88</td>\n",
       "      <td>2196</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>24</td>\n",
       "      <td>9_24_Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>54</td>\n",
       "      <td>9_28_Council Bluffs_IA.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>339</td>\n",
       "      <td>91</td>\n",
       "      <td>2024</td>\n",
       "      <td>Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>55</td>\n",
       "      <td>9_28_Waukesha_WI.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>345</td>\n",
       "      <td>98</td>\n",
       "      <td>2126</td>\n",
       "      <td>Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>56</td>\n",
       "      <td>9_29_Bedford_NH.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>327</td>\n",
       "      <td>91</td>\n",
       "      <td>1873</td>\n",
       "      <td>Bedford</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>29</td>\n",
       "      <td>9_29_Bedford_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>57</td>\n",
       "      <td>9_30_Novi_MI.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>394</td>\n",
       "      <td>115</td>\n",
       "      <td>2512</td>\n",
       "      <td>Novi</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit–Warren–Dearborn_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>9</td>\n",
       "      <td>30</td>\n",
       "      <td>9_30_Novi_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>98 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                  identifier  culture_count  pro_worker_count  \\\n",
       "0            0             1cxw0HrFf00.txt             25                77   \n",
       "1            1             2HoQKX6upm0.txt             24                67   \n",
       "2            2             2R3eHzOMVOk.txt             25                77   \n",
       "3            3             4SUYvWpGgEM.txt             53               143   \n",
       "4            4             5e6vf02MbcI.txt             37               125   \n",
       "..         ...                         ...            ...               ...   \n",
       "93          53         9_24_Roanoke_VA.txt             10               360   \n",
       "94          54  9_28_Council Bluffs_IA.txt             12               339   \n",
       "95          55        9_28_Waukesha_WI.txt              7               345   \n",
       "96          56         9_29_Bedford_NH.txt             10               327   \n",
       "97          57            9_30_Novi_MI.txt             10               394   \n",
       "\n",
       "    Total_sentences  word_count            City  \\\n",
       "0                 1       11312      Eau Claire   \n",
       "1                 1        7166        Portland   \n",
       "2                 1        7096       Fairfield   \n",
       "3                 1       14147         Atlanta   \n",
       "4                 1       12817    Jacksonville   \n",
       "..              ...         ...             ...   \n",
       "93               88        2196         Roanoke   \n",
       "94               91        2024  Council Bluffs   \n",
       "95               98        2126        Waukesha   \n",
       "96               91        1873         Bedford   \n",
       "97              115        2512            Novi   \n",
       "\n",
       "                                 MSA                            MSA_state  \\\n",
       "0                         Eau Claire                        Eau Claire_WI   \n",
       "1            Portland-South Portland           Portland-South Portland_ME   \n",
       "2        Bridgeport-Stamford-Norwalk       Bridgeport-Stamford-Norwalk_CT   \n",
       "3   Atlanta-Sandy Springs-Alpharetta  Atlanta-Sandy Springs-Alpharetta_GA   \n",
       "4                       Jacksonville                      Jacksonville_FL   \n",
       "..                               ...                                  ...   \n",
       "93                           Roanoke                           Roanoke_VA   \n",
       "94              Omaha-Council Bluffs              Omaha-Council Bluffs_IA   \n",
       "95                Milwaukee-Waukesha                Milwaukee-Waukesha_WI   \n",
       "96                 Manchester-Nashua                 Manchester-Nashua_NH   \n",
       "97           Detroit–Warren–Dearborn           Detroit–Warren–Dearborn_MI   \n",
       "\n",
       "    New  ... Found (1 yes, 0 no)  \\\n",
       "0   NaN  ...                 NaN   \n",
       "1   NaN  ...                 0.5   \n",
       "2   NaN  ...                 NaN   \n",
       "3   NaN  ...                 0.5   \n",
       "4   NaN  ...                 0.5   \n",
       "..  ...  ...                 ...   \n",
       "93  NaN  ...                 1.0   \n",
       "94  NaN  ...                 1.0   \n",
       "95  NaN  ...                 1.0   \n",
       "96  NaN  ...                 1.0   \n",
       "97  NaN  ...                 1.0   \n",
       "\n",
       "                                                 Link month day  \\\n",
       "0         https://www.youtube.com/watch?v=1cxw0HrFf00    11   1   \n",
       "1         https://www.youtube.com/watch?v=2HoQKX6upm0     8   4   \n",
       "2         https://www.youtube.com/watch?v=2R3eHzOMVOk     8  13   \n",
       "3         https://www.youtube.com/watch?v=4SUYvWpGgEM     6  15   \n",
       "4         https://www.youtube.com/watch?v=5e6vf02MbcI     8   3   \n",
       "..                                                ...   ...  ..   \n",
       "93                                                NaN     9  24   \n",
       "94                                                NaN     9  28   \n",
       "95                                                NaN     9  28   \n",
       "96                                                NaN     9  29   \n",
       "97  https://www.presidency.ucsb.edu/documents/rema...     9  30   \n",
       "\n",
       "            filename_notxt file_name_youtube file_name_youtubeee  Youtube  \\\n",
       "0       11_1_Eau Claire_WI       1cxw0HrFf00     1cxw0HrFf00.txt      1.0   \n",
       "1          8_4_Portland_ME       2HoQKX6upm0     2HoQKX6upm0.txt      1.0   \n",
       "2        8_13_Fairfield_CT       2R3eHzOMVOk     2R3eHzOMVOk.txt      1.0   \n",
       "3          6_15_Atlanta_GA       4SUYvWpGgEM     4SUYvWpGgEM.txt      1.0   \n",
       "4      8_3_Jacksonville_FL       5e6vf02MbcI     5e6vf02MbcI.txt      1.0   \n",
       "..                     ...               ...                 ...      ...   \n",
       "93         9_24_Roanoke_VA               NaN                 NaN      NaN   \n",
       "94  9_28_Council Bluffs_IA               NaN                 NaN      NaN   \n",
       "95        9_28_Waukesha_WI               NaN                 NaN      NaN   \n",
       "96         9_29_Bedford_NH               NaN                 NaN      NaN   \n",
       "97            9_30_Novi_MI               NaN                 NaN      NaN   \n",
       "\n",
       "   blank  Unique  \n",
       "0    NaN     NaN  \n",
       "1    NaN     NaN  \n",
       "2    NaN     NaN  \n",
       "3    NaN     NaN  \n",
       "4    NaN     NaN  \n",
       "..   ...     ...  \n",
       "93   NaN     NaN  \n",
       "94   NaN     NaN  \n",
       "95   NaN     NaN  \n",
       "96   NaN     NaN  \n",
       "97   NaN     NaN  \n",
       "\n",
       "[98 rows x 27 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Combine the two DataFrames using the append method\n",
    "\n",
    "# Or you can also use pd.concat\n",
    "combined_df = pd.concat([youtube, presidential], ignore_index=True)\n",
    "\n",
    "combined_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace msa_state based on the identifier column\n",
    "# fixing an issue with the code\n",
    "combined_df['MSA_state'] = combined_df.apply(\n",
    "    lambda row: 'Detroit-Warren-Dearborn_MI' if row['identifier'] in [\n",
    "        '9_03_Detroit_MI.txt', \n",
    "        '8_8_Detroit_MI.txt', \n",
    "        '10_31_Warren_MI.txt', \n",
    "        '9_30_Novi_MI.txt'\n",
    "    ] else 'Erie_PA' if row['identifier'] == '8_12_Erie_PA.txt' else row['MSA_state'],\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0                           Eau Claire_WI\n",
      "1              Portland-South Portland_ME\n",
      "2          Bridgeport-Stamford-Norwalk_CT\n",
      "3     Atlanta-Sandy Springs-Alpharetta_GA\n",
      "4                         Jacksonville_FL\n",
      "                     ...                 \n",
      "93                             Roanoke_VA\n",
      "94                Omaha-Council Bluffs_IA\n",
      "95                  Milwaukee-Waukesha_WI\n",
      "96                   Manchester-Nashua_NH\n",
      "97             Detroit-Warren-Dearborn_MI\n",
      "Name: MSA_state, Length: 98, dtype: object\n"
     ]
    }
   ],
   "source": [
    "# Print the msa_state column from combined_df\n",
    "print(combined_df['MSA_state'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "combined_df.to_csv('combined_df.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>identifier</th>\n",
       "      <th>culture_count</th>\n",
       "      <th>pro_worker_count</th>\n",
       "      <th>Total_sentences</th>\n",
       "      <th>word_count</th>\n",
       "      <th>City</th>\n",
       "      <th>MSA</th>\n",
       "      <th>MSA_state</th>\n",
       "      <th>New</th>\n",
       "      <th>...</th>\n",
       "      <th>Found (1 yes, 0 no)</th>\n",
       "      <th>Link</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>filename_notxt</th>\n",
       "      <th>file_name_youtube</th>\n",
       "      <th>file_name_youtubeee</th>\n",
       "      <th>Youtube</th>\n",
       "      <th>blank</th>\n",
       "      <th>Unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>11312</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire</td>\n",
       "      <td>Eau Claire_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=1cxw0HrFf00</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>11_1_Eau Claire_WI</td>\n",
       "      <td>1cxw0HrFf00</td>\n",
       "      <td>1cxw0HrFf00.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>7166</td>\n",
       "      <td>Portland</td>\n",
       "      <td>Portland-South Portland</td>\n",
       "      <td>Portland-South Portland_ME</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=2HoQKX6upm0</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>8_4_Portland_ME</td>\n",
       "      <td>2HoQKX6upm0</td>\n",
       "      <td>2HoQKX6upm0.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>25</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "      <td>7096</td>\n",
       "      <td>Fairfield</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk</td>\n",
       "      <td>Bridgeport-Stamford-Norwalk_CT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://www.youtube.com/watch?v=2R3eHzOMVOk</td>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "      <td>8_13_Fairfield_CT</td>\n",
       "      <td>2R3eHzOMVOk</td>\n",
       "      <td>2R3eHzOMVOk.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>53</td>\n",
       "      <td>143</td>\n",
       "      <td>1</td>\n",
       "      <td>14147</td>\n",
       "      <td>Atlanta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta</td>\n",
       "      <td>Atlanta-Sandy Springs-Alpharetta_GA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=4SUYvWpGgEM</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>6_15_Atlanta_GA</td>\n",
       "      <td>4SUYvWpGgEM</td>\n",
       "      <td>4SUYvWpGgEM.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>37</td>\n",
       "      <td>125</td>\n",
       "      <td>1</td>\n",
       "      <td>12817</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville</td>\n",
       "      <td>Jacksonville_FL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.5</td>\n",
       "      <td>https://www.youtube.com/watch?v=5e6vf02MbcI</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>8_3_Jacksonville_FL</td>\n",
       "      <td>5e6vf02MbcI</td>\n",
       "      <td>5e6vf02MbcI.txt</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>53</td>\n",
       "      <td>9_24_Roanoke_VA.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>360</td>\n",
       "      <td>88</td>\n",
       "      <td>2196</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke</td>\n",
       "      <td>Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>24</td>\n",
       "      <td>9_24_Roanoke_VA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>54</td>\n",
       "      <td>9_28_Council Bluffs_IA.txt</td>\n",
       "      <td>12</td>\n",
       "      <td>339</td>\n",
       "      <td>91</td>\n",
       "      <td>2024</td>\n",
       "      <td>Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs</td>\n",
       "      <td>Omaha-Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Council Bluffs_IA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>55</td>\n",
       "      <td>9_28_Waukesha_WI.txt</td>\n",
       "      <td>7</td>\n",
       "      <td>345</td>\n",
       "      <td>98</td>\n",
       "      <td>2126</td>\n",
       "      <td>Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha</td>\n",
       "      <td>Milwaukee-Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>28</td>\n",
       "      <td>9_28_Waukesha_WI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>56</td>\n",
       "      <td>9_29_Bedford_NH.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>327</td>\n",
       "      <td>91</td>\n",
       "      <td>1873</td>\n",
       "      <td>Bedford</td>\n",
       "      <td>Manchester-Nashua</td>\n",
       "      <td>Manchester-Nashua_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>29</td>\n",
       "      <td>9_29_Bedford_NH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>57</td>\n",
       "      <td>9_30_Novi_MI.txt</td>\n",
       "      <td>10</td>\n",
       "      <td>394</td>\n",
       "      <td>115</td>\n",
       "      <td>2512</td>\n",
       "      <td>Novi</td>\n",
       "      <td>Detroit–Warren–Dearborn</td>\n",
       "      <td>Detroit-Warren-Dearborn_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>https://www.presidency.ucsb.edu/documents/rema...</td>\n",
       "      <td>9</td>\n",
       "      <td>30</td>\n",
       "      <td>9_30_Novi_MI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>98 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                  identifier  culture_count  pro_worker_count  \\\n",
       "0            0             1cxw0HrFf00.txt             25                77   \n",
       "1            1             2HoQKX6upm0.txt             24                67   \n",
       "2            2             2R3eHzOMVOk.txt             25                77   \n",
       "3            3             4SUYvWpGgEM.txt             53               143   \n",
       "4            4             5e6vf02MbcI.txt             37               125   \n",
       "..         ...                         ...            ...               ...   \n",
       "93          53         9_24_Roanoke_VA.txt             10               360   \n",
       "94          54  9_28_Council Bluffs_IA.txt             12               339   \n",
       "95          55        9_28_Waukesha_WI.txt              7               345   \n",
       "96          56         9_29_Bedford_NH.txt             10               327   \n",
       "97          57            9_30_Novi_MI.txt             10               394   \n",
       "\n",
       "    Total_sentences  word_count            City  \\\n",
       "0                 1       11312      Eau Claire   \n",
       "1                 1        7166        Portland   \n",
       "2                 1        7096       Fairfield   \n",
       "3                 1       14147         Atlanta   \n",
       "4                 1       12817    Jacksonville   \n",
       "..              ...         ...             ...   \n",
       "93               88        2196         Roanoke   \n",
       "94               91        2024  Council Bluffs   \n",
       "95               98        2126        Waukesha   \n",
       "96               91        1873         Bedford   \n",
       "97              115        2512            Novi   \n",
       "\n",
       "                                 MSA                            MSA_state  \\\n",
       "0                         Eau Claire                        Eau Claire_WI   \n",
       "1            Portland-South Portland           Portland-South Portland_ME   \n",
       "2        Bridgeport-Stamford-Norwalk       Bridgeport-Stamford-Norwalk_CT   \n",
       "3   Atlanta-Sandy Springs-Alpharetta  Atlanta-Sandy Springs-Alpharetta_GA   \n",
       "4                       Jacksonville                      Jacksonville_FL   \n",
       "..                               ...                                  ...   \n",
       "93                           Roanoke                           Roanoke_VA   \n",
       "94              Omaha-Council Bluffs              Omaha-Council Bluffs_IA   \n",
       "95                Milwaukee-Waukesha                Milwaukee-Waukesha_WI   \n",
       "96                 Manchester-Nashua                 Manchester-Nashua_NH   \n",
       "97           Detroit–Warren–Dearborn           Detroit-Warren-Dearborn_MI   \n",
       "\n",
       "    New  ... Found (1 yes, 0 no)  \\\n",
       "0   NaN  ...                 NaN   \n",
       "1   NaN  ...                 0.5   \n",
       "2   NaN  ...                 NaN   \n",
       "3   NaN  ...                 0.5   \n",
       "4   NaN  ...                 0.5   \n",
       "..  ...  ...                 ...   \n",
       "93  NaN  ...                 1.0   \n",
       "94  NaN  ...                 1.0   \n",
       "95  NaN  ...                 1.0   \n",
       "96  NaN  ...                 1.0   \n",
       "97  NaN  ...                 1.0   \n",
       "\n",
       "                                                 Link month day  \\\n",
       "0         https://www.youtube.com/watch?v=1cxw0HrFf00    11   1   \n",
       "1         https://www.youtube.com/watch?v=2HoQKX6upm0     8   4   \n",
       "2         https://www.youtube.com/watch?v=2R3eHzOMVOk     8  13   \n",
       "3         https://www.youtube.com/watch?v=4SUYvWpGgEM     6  15   \n",
       "4         https://www.youtube.com/watch?v=5e6vf02MbcI     8   3   \n",
       "..                                                ...   ...  ..   \n",
       "93                                                NaN     9  24   \n",
       "94                                                NaN     9  28   \n",
       "95                                                NaN     9  28   \n",
       "96                                                NaN     9  29   \n",
       "97  https://www.presidency.ucsb.edu/documents/rema...     9  30   \n",
       "\n",
       "            filename_notxt file_name_youtube file_name_youtubeee  Youtube  \\\n",
       "0       11_1_Eau Claire_WI       1cxw0HrFf00     1cxw0HrFf00.txt      1.0   \n",
       "1          8_4_Portland_ME       2HoQKX6upm0     2HoQKX6upm0.txt      1.0   \n",
       "2        8_13_Fairfield_CT       2R3eHzOMVOk     2R3eHzOMVOk.txt      1.0   \n",
       "3          6_15_Atlanta_GA       4SUYvWpGgEM     4SUYvWpGgEM.txt      1.0   \n",
       "4      8_3_Jacksonville_FL       5e6vf02MbcI     5e6vf02MbcI.txt      1.0   \n",
       "..                     ...               ...                 ...      ...   \n",
       "93         9_24_Roanoke_VA               NaN                 NaN      NaN   \n",
       "94  9_28_Council Bluffs_IA               NaN                 NaN      NaN   \n",
       "95        9_28_Waukesha_WI               NaN                 NaN      NaN   \n",
       "96         9_29_Bedford_NH               NaN                 NaN      NaN   \n",
       "97            9_30_Novi_MI               NaN                 NaN      NaN   \n",
       "\n",
       "   blank  Unique  \n",
       "0    NaN     NaN  \n",
       "1    NaN     NaN  \n",
       "2    NaN     NaN  \n",
       "3    NaN     NaN  \n",
       "4    NaN     NaN  \n",
       "..   ...     ...  \n",
       "93   NaN     NaN  \n",
       "94   NaN     NaN  \n",
       "95   NaN     NaN  \n",
       "96   NaN     NaN  \n",
       "97   NaN     NaN  \n",
       "\n",
       "[98 rows x 27 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combined_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Deleating the transitory files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trans_Y.csv has been deleted.\n",
      "trans.csv has been deleted.\n"
     ]
    }
   ],
   "source": [
    "# Define the file names\n",
    "files_to_delete = ['trans_Y.csv', 'trans.csv']\n",
    "\n",
    "# Loop through the file names and delete them\n",
    "for file in files_to_delete:\n",
    "    if os.path.exists(file):\n",
    "        os.remove(file)\n",
    "        print(f\"{file} has been deleted.\")\n",
    "    else:\n",
    "        print(f\"{file} not found in the current directory.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
